Compare commits

..

1172 Commits

Author SHA1 Message Date
Gonglei
3e10c3ecfc vnc: fix qemu crash because of SIGSEGV
The backtrace is:

0x00007f0b75cdf880 in pixman_image_get_stride () from /lib64/libpixman-1.so.0
0x00007f0b77bcb3cf in vnc_server_fb_stride (vd=0x7f0b7a1a2bb0) at ui/vnc.c:680
vnc_dpy_copy (dcl=0x7f0b7a1a2c00, src_x=224, src_y=263, dst_x=319, dst_y=363, w=1, h=1) at ui/vnc.c:915
0x00007f0b77bbcc35 in dpy_gfx_copy (con=0x7f0b7a146210, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319,
dst_y=dst_y@entry=363, w=1, h=1) at ui/console.c:1575
0x00007f0b77bbda4e in qemu_console_copy (con=<optimized out>, src_x=src_x@entry=224, src_y=src_y@entry=263, dst_x=dst_x@entry=319,
dst_y=dst_y@entry=363, w=<optimized out>, h=<optimized out>) at ui/console.c:2111
0x00007f0b77ac0980 in cirrus_do_copy (h=<optimized out>, w=<optimized out>, src=<optimized out>, dst=<optimized out>, s=0x7f0b7b086090) at hw/display/cirrus_vga.c:774
cirrus_bitblt_videotovideo_copy (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:793
cirrus_bitblt_videotovideo (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:915
cirrus_bitblt_start (s=0x7f0b7b086090) at hw/display/cirrus_vga.c:1056
0x00007f0b77965cfb in memory_region_write_accessor (mr=0x7f0b7b096e40, addr=320, value=<optimized out>, size=1, shift=<optimized out>,mask=<optimized out>, attrs=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:525
0x00007f0b77963f59 in access_with_adjusted_size (addr=addr@entry=320, value=value@entry=0x7f0b69a268d8, size=size@entry=4,
access_size_min=<optimized out>, access_size_max=<optimized out>, access=access@entry=0x7f0b77965c80 <memory_region_write_accessor>,
mr=mr@entry=0x7f0b7b096e40, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:591
0x00007f0b77968315 in memory_region_dispatch_write (mr=mr@entry=0x7f0b7b096e40, addr=addr@entry=320, data=18446744073709551362,
size=size@entry=4, attrs=attrs@entry=...) at /root/rpmbuild/BUILD/master/qemu/memory.c:1262
0x00007f0b779256a9 in address_space_write_continue (mr=0x7f0b7b096e40, l=4, addr1=320, len=4, buf=0x7f0b77713028 "\002\377\377\377",
attrs=..., addr=4273930560, as=0x7f0b7827d280 <address_space_memory>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2544
address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2601
0x00007f0b77925c1d in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=..., attrs@entry=...,
buf=buf@entry=0x7f0b77713028 "\002\377\377\377", len=<optimized out>, is_write=<optimized out>) at /root/rpmbuild/BUILD/master/qemu/exec.c:2703
0x00007f0b77962f53 in kvm_cpu_exec (cpu=cpu@entry=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/kvm-all.c:1965
0x00007f0b77950cc6 in qemu_kvm_cpu_thread_fn (arg=0x7f0b79fcc2d0) at /root/rpmbuild/BUILD/master/qemu/cpus.c:1078
0x00007f0b744b3dc5 in start_thread (arg=0x7f0b69a27700) at pthread_create.c:308
0x00007f0b70d3d66d in clone () from /lib64/libc.so.6

The code path while meeting segfault:
 vnc_dpy_copy
   vnc_update_client
     vnc_disconnect_finish [while vnc_disconnect_start() is invoked because somethins wrong]
       vnc_update_server_surface
         vd->server = NULL;
   vnc_server_fb_stride
     pixman_image_get_stride(vd->server)

Let's add a non-NULL check before calling vnc_server_fb_stride() to avoid segmentation fault.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel P. Berrange <berrange@redhat.com>
Reported-by: Yanying Zhuang <ann.zhuangyanying@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1472788698-120964-1-git-send-email-arei.gonglei@huawei.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Li Zhijian
93ca519ec4 qemu-options.hx: correct spice options streaming-video default document value to 'off'
since f1d3e58, the code had changed the default value to 'off', so this patch
make document and code are consistent.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Message-id: 1470024419-10886-1-git-send-email-lizhijian@cn.fujitsu.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
99a9ef44dc ui/curses.c: Clean up nextchr logic
Coverity identifies that at the top of the while(1) loop
in curses_refresh() the variable nextchr is always ERR,
and so the else case of the first if() is dead code.
Remove this dead code, and narrow the scope of the
nextchr variable to the place where it's used.

(This confused logic has been present since the curses
code was added to QEMU in 2008.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470925407-23850-3-git-send-email-peter.maydell@linaro.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
bba4e1b591 ui/curses.c: Ensure we don't read off the end of curses2qemu array
Coverity spots that there is no bounds check before we
access the curses2qemu[] array.  Add one, bringing this
code path into line with the one that looks up entries
in curses2keysym[].

In theory getch() shouldn't return out of range keycodes,
but it's better not to assume this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470925407-23850-2-git-send-email-peter.maydell@linaro.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-09-13 08:01:39 +02:00
Peter Maydell
7263da7804 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Mon 12 Sep 2016 11:51:09 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to c5542f2 built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 15:09:47 +01:00
Peter Maydell
d4c61988b8 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2016-09-12-1' into staging
Merge qcrypto 2016/09/12 v1

# gpg: Signature made Mon 12 Sep 2016 12:02:20 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2016-09-12-1:
  crypto: report enum strings instead of values in errors
  crypto: fix building complaint
  crypto: ensure XTS is only used with ciphers with 16 byte blocks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 12:48:47 +01:00
Daniel P. Berrange
90d6f60d07 crypto: report enum strings instead of values in errors
Several error messages print out the raw enum value, which
is less than helpful to users, as these values are not
documented, nor stable across QEMU releases. Switch to use
the enum string instead.

The nettle impl also had two typos where it mistakenly
said "algorithm" instead of "mode", and actually reported
the algorithm value too.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:52 +01:00
Gonglei
d9269b274a crypto: fix building complaint
gnutls commit 846753877d renamed LIBGNUTLS_VERSION_NUMBER to GNUTLS_VERSION_NUMBER.
If using gnutls before that verion, we'll get the below warning:
crypto/tlscredsx509.c:618:5: warning: "GNUTLS_VERSION_NUMBER" is not defined

Because gnutls 3.x still defines LIBGNUTLS_VERSION_NUMBER for back compat, Let's
use LIBGNUTLS_VERSION_NUMBER instead of GNUTLS_VERSION_NUMBER to fix building
complaint.

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:52 +01:00
Daniel P. Berrange
a5d2f44d0d crypto: ensure XTS is only used with ciphers with 16 byte blocks
The XTS cipher mode needs to be used with a cipher which has
a block size of 16 bytes. If a mis-matching block size is used,
the code will either corrupt memory beyond the IV array, or
not fully encrypt/decrypt the IV.

This fixes a memory corruption crash when attempting to use
cast5-128 with xts, since the former has an 8 byte block size.

A test case is added to ensure the cipher creation fails with
such an invalid combination.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-09-12 12:00:06 +01:00
Peter Maydell
c569c537e5 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio,vhost,pc: fixes and updates

balloon fixes wrt migration
virtio-vsock device support

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 09 Sep 2016 22:36:13 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  vhost-vsock: add virtio sockets device
  tests/acpi: speedup acpi tests
  virtio-pci: minor refactoring
  vhost: don't set vring call if no vector
  virtio-pci: error out when both legacy and modern modes are disabled
  virtio-balloon: fix stats vq migration
  virtio: add virtqueue_rewind()
  virtio-balloon: discard virtqueue element on reset
  virtio: zero vq->inuse in virtio_reset()
  virtio-pci: reduce modern_mem_bar size
  target-i386: present virtual L3 cache info for vcpus
  pc: Add 2.8 machine
  virtio-pci: use size from correct structure
  virtio: Tell the user what went wrong when event_notifier_init failed

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-12 11:25:40 +01:00
Mark Cave-Ayland
a26f7f2cb8 Update OpenBIOS images to c5542f2 built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-09-12 08:14:50 +01:00
Stefan Hajnoczi
fc0b9b0e1c vhost-vsock: add virtio sockets device
Implement the new virtio sockets device for host<->guest communication
using the Sockets API.  Most of the work is done in a vhost kernel
driver so that virtio-vsock can hook into the AF_VSOCK address family.
The QEMU vhost-vsock device handles configuration and live migration
while the rx/tx happens in the vhost_vsock.ko Linux kernel driver.

The vsock device must be given a CID (host-wide unique address):

  # qemu -device vhost-vsock-pci,id=vhost-vsock-pci0,guest-cid=3 ...

For more information see:
http://qemu-project.org/Features/VirtioVsock

[Endianness fixes and virtio-ccw support by Claudio Imbrenda
<imbrenda@linux.vnet.ibm.com>]

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
[mst: rebase to master]
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-10 00:28:08 +03:00
Marcel Apfelbaum
947b205fdb tests/acpi: speedup acpi tests
Use kvm acceleration if available.
Disable kernel-irqchip and use qemu64 cpu
for both kvm and tcg cases.

Using kvm acceleration saves about a second
and disabling kernel-irqchip has no visible
performance impact.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-10 00:08:28 +03:00
Michael S. Tsirkin
71d19fc513 virtio-pci: minor refactoring
!legacy && !modern is shorter than !(legacy || modern).
I also perfer this (less ()s) as a matter of taste.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Jason Wang
96a3d98d2c vhost: don't set vring call if no vector
We used to set vring call fd unconditionally even if guest driver does
not use MSIX for this vritqueue at all. This will cause lots of
unnecessary userspace access and other checks for drivers does not use
interrupt at all (e.g virtio-net pmd). So check and clean vring call
fd if guest does not use any vector for this virtqueue at
all.

Perf diffs (on rx) shows lots of cpus wasted on vhost_signal() were saved:

#
    28.12%  -27.82%  [vhost]           [k] vhost_signal
    14.44%   -1.69%  [kernel.vmlinux]  [k] copy_user_generic_string
     7.05%   +1.53%  [kernel.vmlinux]  [k] __free_page_frag
     6.51%   +5.53%  [vhost]           [k] vhost_get_vq_desc
...

Pktgen tests shows 15.8% improvement on rx pps and 6.5% on tx pps.

Before: RX 2.08Mpps TX 1.35Mpps
After:  RX 2.41Mpps TX 1.44Mpps

Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Greg Kurz
3eff376977 virtio-pci: error out when both legacy and modern modes are disabled
Without presuming if we got there because of a user mistake or some
more subtle bug in the tooling, it really does not make sense to
implement a non-functional device.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Ladi Prosek
4a1e48beca virtio-balloon: fix stats vq migration
The statistics virtqueue is not migrated properly because virtio-balloon
does not include s->stats_vq_elem in the migration stream.

After migration the statistics virtqueue hangs because the host never
completes the last element (s->stats_vq_elem is NULL on the destination
QEMU).  Therefore the guest never submits new elements and the virtqueue
is hung.

Instead of changing the migration stream format in an incompatible way,
detect the migration case and rewind the virtqueue so the last element
can be completed.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Suggested-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Stefan Hajnoczi
297a75e6c5 virtio: add virtqueue_rewind()
virtqueue_discard() requires a VirtQueueElement but virtio-balloon does
not migrate its in-use element.  Introduce a new function that is
similar to virtqueue_discard() but doesn't require a VirtQueueElement.

This will allow virtio-balloon to access element again after migration
with the usual proviso that the guest may have modified the vring since
last time.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Ladi Prosek
104e70cae7 virtio-balloon: discard virtqueue element on reset
The one pending element is being freed but not discarded on device
reset, which causes svq->inuse to creep up, eventually hitting the
"Virtqueue size exceeded" error.

Properly discarding the element on device reset makes sure that its
buffers are unmapped and the inuse counter stays balanced.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Roman Kagan <rkagan@virtuozzo.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Stefan Hajnoczi
4b7f91ed02 virtio: zero vq->inuse in virtio_reset()
vq->inuse must be zeroed upon device reset like most other virtqueue
fields.

In theory, virtio_reset() just needs assert(vq->inuse == 0) since
devices must clean up in-flight requests during reset (requests cannot
not be leaked!).

In practice, it is difficult to achieve vq->inuse == 0 across reset
because balloon, blk, 9p, etc implement various different strategies for
cleaning up requests.  Most devices call g_free(elem) directly without
telling virtio.c that the VirtQueueElement is cleaned up.  Therefore
vq->inuse is not decremented during reset.

This patch zeroes vq->inuse and trusts that devices are not leaking
VirtQueueElements across reset.

I will send a follow-up series that refactors request life-cycle across
all devices and converts vq->inuse = 0 into assert(vq->inuse == 0) but
this more invasive approach is not appropriate for stable trees.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Cc: qemu-stable <qemu-stable@nongnu.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Ladi Prosek <lprosek@redhat.com>
2016-09-09 20:58:34 +03:00
Marcel Apfelbaum
d9997d89a4 virtio-pci: reduce modern_mem_bar size
Currently each VQ Notification Virtio Capability is allocated
on a different page. The idea is to enable split drivers within
guests, however there are no known plans to do that.
The allocation will result in a 8MB BAR, more than various
guest firmwares pre-allocates for PCI Bridges hotplug process.

Reserve 4 bytes per VQ by default and add a new parameter
"page-per-vq" to be used with split drivers.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Longpeng(Mike)
14c985cffa target-i386: present virtual L3 cache info for vcpus
Some software algorithms are based on the hardware's cache info, for example,
for x86 linux kernel, when cpu1 want to wakeup a task on cpu2, cpu1 will trigger
a resched IPI and told cpu2 to do the wakeup if they don't share low level
cache. Oppositely, cpu1 will access cpu2's runqueue directly if they share llc.
The relevant linux-kernel code as bellow:

	static void ttwu_queue(struct task_struct *p, int cpu)
	{
		struct rq *rq = cpu_rq(cpu);
		......
		if (... && !cpus_share_cache(smp_processor_id(), cpu)) {
			......
			ttwu_queue_remote(p, cpu); /* will trigger RES IPI */
			return;
		}
		......
		ttwu_do_activate(rq, p, 0); /* access target's rq directly */
		......
	}

In real hardware, the cpus on the same socket share L3 cache, so one won't
trigger a resched IPIs when wakeup a task on others. But QEMU doesn't present a
virtual L3 cache info for VM, then the linux guest will trigger lots of RES IPIs
under some workloads even if the virtual cpus belongs to the same virtual socket.

For KVM, there will be lots of vmexit due to guest send IPIs.
The workload is a SAP HANA's testsuite, we run it one round(about 40 minuates)
and observe the (Suse11sp3)Guest's amounts of RES IPIs which triggering during
the period:
        No-L3           With-L3(applied this patch)
cpu0:	363890		44582
cpu1:	373405		43109
cpu2:	340783		43797
cpu3:	333854		43409
cpu4:	327170		40038
cpu5:	325491		39922
cpu6:	319129		42391
cpu7:	306480		41035
cpu8:	161139		32188
cpu9:	164649		31024
cpu10:	149823		30398
cpu11:	149823		32455
cpu12:	164830		35143
cpu13:	172269		35805
cpu14:	179979		33898
cpu15:	194505		32754
avg:	268963.6	40129.8

The VM's topology is "1*socket 8*cores 2*threads".
After present virtual L3 cache info for VM, the amounts of RES IPIs in guest
reduce 85%.

For KVM, vcpus send IPIs will cause vmexit which is expensive, so it can cause
severe performance degradation. We had tested the overall system performance if
vcpus actually run on sparate physical socket. With L3 cache, the performance
improves 7.2%~33.1%(avg:15.7%).

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Longpeng(Mike)
a4d3c83476 pc: Add 2.8 machine
This will used by the next patch.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Michael S. Tsirkin
e3aab6c7f3 virtio-pci: use size from correct structure
PIO MR registration should use size from the correct notify struct.
Doesn't affect any visible behaviour because the field values are the
same (both are 4).

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Thomas Huth
a8bba0ada4 virtio: Tell the user what went wrong when event_notifier_init failed
event_notifier_init() can fail in real life, for example when there
are not enough open file handles available (EMFILE) when using a lot
of devices. So instead of leaving the average user with a cryptic
error number only, print out a proper error message with strerror()
instead, so that the user has a better way to figure out what is
going on and that using "ulimit -n" might help here for example.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-09 20:58:34 +03:00
Peter Maydell
c2a57aae9a Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Fri 09 Sep 2016 05:54:35 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: silence debootstrap when --quiet is given
  docker: build debootstrap after cloning
  docker: make sure debootstrap is at least 1.0.67
  docker: print warning if EXECUTABLE is not set when building debootstrap image
  docker: debian-bootstrap.pre: print helpful message if DEB_ARCH/DEB_TYPE unset
  docker: debian-bootstrap.pre: print error messages to stderr
  docker: avoid dependency on 'realpath' package
  docker.py: don't hang on large docker output
  docker: Add a glib2-2.22 image

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-09 12:49:41 +01:00
Peter Maydell
5f31bbf101 qtest.c: Allow zero size in memset qtest commands
Some tests use the qtest protocol "memset" command with a zero
size, expecting it to do nothing. However in the current code this
will result in calling memset() with a NULL pointer, which is
undefined behaviour. Detect and specially handle zero sizes to
avoid this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1470393800-7882-1-git-send-email-peter.maydell@linaro.org
2016-09-09 11:16:18 +01:00
Peter Maydell
33e60e0198 Merge remote-tracking branch 'remotes/elmarco/tags/leak-pull-request' into staging
Pull request

v2:
- dropped "tests: fix small leak in test-io-channel-command" that Daniel Berrange will pick
- fixed "tests: add qtest_add_data_func_full" to work with glib < 2.26

# gpg: Signature made Thu 08 Sep 2016 15:16:54 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leak-pull-request: (25 commits)
  tests: fix postcopy-test leaks
  tests: fix rsp leak in postcopy-test
  tests: pc-cpu-test leaks fixes
  tests: add qtest_add_data_func_full
  bus: simplify name handling
  ipmi: free extern timer
  sd: free timer
  pc: keep gsi reference
  pc: free i8259
  tests: fix qom-test leaks
  acpi-build: fix array leak
  machine: use class base init generated name
  pc: don't leak a20_line
  pc: simplify passing qemu_irq
  portio: keep references on portio
  tests: fix leak in test-string-input-visitor
  tests: fix check-qom-proplist leaks
  tests: fix check-qom-interface leaks
  tests: fix test-iov leaks
  tests: fix test-vmstate leaks
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-08 15:22:50 +01:00
Marc-André Lureau
e2dd21e510 tests: fix postcopy-test leaks
A few strings are allocated and never freed.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
5b1ded224f tests: fix rsp leak in postcopy-test
In all cases, even when the dict doesn't contain 'ram', the qmp response
must be unref.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
34e46f604d tests: pc-cpu-test leaks fixes
The path is allocated and should be freed.

The qmp response should be unref, but then 'machine' must be duplicated.

Use a destroy function for the PCTestData.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
822e36ca35 tests: add qtest_add_data_func_full
Allows one to specify a destroy function for the test data.

Add a fallback using glib g_test_add_vtable() internal function, whose
signature changed over time. Tested with glib 2.22, 2.26 and 2.48, which
according to git log should be enough to cover all variations.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
f73480c36f bus: simplify name handling
Simplify a bit the code by using g_strdup_printf() and store it in a
non-const value so casting is no longer needed, and ownership is
clearer.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
e9529768d4 ipmi: free extern timer
Free the timer allocated during instance init.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Corey Minyard <cminyard@mvista.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
5ba344013c sd: free timer
Free the timer allocated in instance_init.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Andrew Baumann <Andrew.Baumann@microsoft.com>
2016-09-08 18:05:22 +04:00
Marc-André Lureau
3e6c0c4c2c pc: keep gsi reference
Further cleanup would need to call qemu_free_irq() at the appropriate
time, but for now this silences ASAN about direct leaks.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
8197e24c38 pc: free i8259
Simiarly to 2ba154cf4e

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
ff1685a333 tests: fix qom-test leaks
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
354fb471bd acpi-build: fix array leak
The free_ranges array is used as a temporary pointer array, the segment
should still be freed, however, it shouldn't free the elements themself.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
8ea753718b machine: use class base init generated name
machine_class_base_init() member name is allocated by
machine_class_base_init(), but not freed by
machine_class_finalize().  Simply freeing there doesn't work,
because DEFINE_PC_MACHINE() overwrites it with a literal string.

Fix DEFINE_PC_MACHINE() not to overwrite it, and add the missing
free to machine_class_finalize().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
ac64c5fdf8 pc: don't leak a20_line
The irqs array is no longer being used

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
d80fe99de4 pc: simplify passing qemu_irq
qemu_irq is already a pointer, no need to have an extra pointer level.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
e305a16510 portio: keep references on portio
The isa_register_portio_list() function allocates ioports
data/state. Let's keep the reference to this data on some owner.  This
isn't enough to fix leaks, but at least, ASAN stops complaining of
direct leaks. Further cleanup would require calling
portio_list_del/destroy().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
bd794065ff tests: fix leak in test-string-input-visitor
Free the list returned by visit_type_intList().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 18:05:21 +04:00
Marc-André Lureau
3972a4884d tests: fix check-qom-proplist leaks
Found thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
265804b5d7 tests: fix check-qom-interface leaks
Found thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
d55f295b2b tests: fix test-iov leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
4ae3c0e27f tests: fix test-vmstate leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
d6f723b513 tests: fix test-cutils leaks
Spotted thanks to ASAN.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
3e3e302ff3 qga: free remaining leaking state
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
2aa67a9196 qga: free the whole blacklist
Free the config blacklist list, not just the elements. Do it so in the
more appropriate function config_free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
5c7e3e9fb1 glib-compat: add g_(s)list_free_full()
Those functions are only available since glib 2.28.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-09-08 17:57:32 +04:00
Marc-André Lureau
1e2713384c tests: fix test-qga leaks
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-08 17:57:32 +04:00
Sascha Silbe
f8042deafa docker: silence debootstrap when --quiet is given
If we silence docker when --quiet is given, we should also silence the
.pre script (i.e. debootstrap).

Only discards stdout, so some diagnostics (e.g. from git clone) are
still printed. Most of the verbose output is gone however and this way
we still have a chance to see error messages.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-9-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
ae2f659ca5 docker: build debootstrap after cloning
When using the git version of debootstrap (because no usable version
of debootstrap was installed on the host), we need to run 'make' so
that devices.tar.gz gets built. Otherwise the first debootstrap stage
will fail without printing any error message.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-8-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
00263139f8 docker: make sure debootstrap is at least 1.0.67
debootstrap prior to 1.0.67 generated an empty sources.list during
foreign bootstraps (Debian#732255 [1]). Fall back to the git checkout
if the installed debootstrap version is too old.

[1] https://bugs.debian.org/732255

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-7-git-send-email-silbe@linux.vnet.ibm.com>
[Update 'sort -C' to 'sorc -c &>/dev/null' - Fam]
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
a351b4b06e docker: print warning if EXECUTABLE is not set when building debootstrap image
Building the debian-debootstrap image will usually fail if EXECUTABLE
isn't set (when using the Makefile). Warn the user in this case so
they know why it's failing.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-6-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
341edc0c47 docker: debian-bootstrap.pre: print helpful message if DEB_ARCH/DEB_TYPE unset
The debian-bootstrap image doesn't choose a default architecture and
distribution version, instead the user has to set both DEB_ARCH and
DEB_TYPE in the environment. Print a reasonably helpful message if
either of them isn't set instead of complaining about "qemu-" being
missing or erroring out because we cannot cd to the mirror URL.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-5-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
b5dc88ce24 docker: debian-bootstrap.pre: print error messages to stderr
Send error messages where they belong so they're seen even if stdout
is redirected to /dev/null.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-4-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
08f4e8d23d docker: avoid dependency on 'realpath' package
The 'realpath' executable is shipped in a separate package that isn't
installed by default on some distros.

We already use 'readlink -e' (provided by GNU coreutils) in some other
part of the code, so let's settle for that instead.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-Id: <1473192351-601-3-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Sascha Silbe
c977257045 docker.py: don't hang on large docker output
Unlike Popen.communicate(), subprocess.call() doesn't read from the
stdout file descriptor. If the child process produces more output than
fits into the pipe buffer, it will block indefinitely.

If we don't intend to consume the output, just send it straight to
/dev/null to avoid this issue.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Message-Id: <1473192351-601-2-git-send-email-silbe@linux.vnet.ibm.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-09-08 19:56:34 +08:00
Fam Zheng
9af4c174a3 docker: Add a glib2-2.22 image
It's a variation of our existing centos6, plus two more lines to
downgrade glib2 to version 2.22 which we download from vault.centos.org.

Suggested-by: Paolo Bonzini <pbonzoni@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470708908-12885-1-git-send-email-famz@redhat.com>
2016-09-08 19:56:34 +08:00
Peter Maydell
59351d9b40 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.8-20160907' into staging
ppc patch queue for 2016-Sep-7

This is my first pull request for the newly opened qemu-2.8 tree.  It
contains a heap of things that were too late for 2.7 and have been
queued for a while.  In particular:
    * A number of preliminary patches for the powernv machine type
        * A substantial cleanup of exception handling which will be
          necessary to support running a TCG with hypervisor
          facilities
    * A start on support for POWER9
        * Some TCG implementations for new POWER9 instructions
        * Some TCG and related cleanups in preparation for POWER9
    * Some assorted TCG optimizations
    * An implementation of the H_CHANGE_LOGICAL_LAN_MAC hypercall
      which allows the MAC address to be changed on the PAPR virtual
      NIC.
    * Add some extra test cases for several machines (this isn't
      strictly in the ppc code, but is most value to ppc)

NOTE: This pull request supersedes ppc-for-2.8-20160906, which had
some problems.  Changes:
  * Dropped BenH's lmw/stmw speedups, which break for
    qemu-system-ppc64 on BE hosts
  * A small fix to Thomas' serial output test to avoid a warning on
    the isapc machine type.
  * Some trivial checkpatch fixes

Note that some of the patches in this series still have large numbers
of checkpatch warnings.  This is because they're moving existing code
that predates most of the checkpatch style conventions.

# gpg: Signature made Wed 07 Sep 2016 07:09:27 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.8-20160907: (64 commits)
  tests: Check serial output of firmware boot of some machines
  tests: Resort check-qtest entries in Makefile.include
  spapr: implement H_CHANGE_LOGICAL_LAN_MAC h_call
  ppc: Improve a few more helper flags
  ppc: Improve the exception helpers flags
  ppc: Improve flags for helpers loading/writing the time facilities
  ppc: Don't generate dead code on unconditional branches
  ppc: Stop dumping state on all exceptions in linux-user
  ppc: Fix catching some segfaults in user mode
  ppc: Fix macio ESCC legacy mapping
  hw/ppc: add a ppc_create_page_sizes_prop() helper routine
  hw/ppc: use error_report instead of fprintf
  ppc: Rename #include'd .c files to .inc.c
  target-ppc: add extswsli[.] instruction
  target-ppc: add vsrv instruction
  target-ppc: add vslv instruction
  target-ppc: add vcmpnez[b,h,w][.] instructions
  target-ppc: add vabsdu[b,h,w] instructions
  target-ppc: add dtstsfi[q] instructions
  target-ppc: implement branch-less divd[o][.]
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-08 11:28:12 +01:00
Peter Maydell
0813cbf913 tests/hd-geo-test: Don't pass NULL to unlink()
The unlink() function doesn't accept a NULL pointer, so
don't pass it one. Spotted by the clang sanitizer.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-id: 1470391392-28274-1-git-send-email-peter.maydell@linaro.org
2016-09-08 10:43:58 +01:00
Thomas Huth
d2ab58ffc9 tests: Check serial output of firmware boot of some machines
Some of the machines that we have got a firmware image for write
some output to the serial console while booting up. We can use
this output to make sure that the machine is basically working,
so this adds a test that checks the output of these machines
for some well-known "magic" strings.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Thomas Huth
29531542bc tests: Resort check-qtest entries in Makefile.include
The rather random list of check-qtest-xxx entries caused some
confusion in the past, where to use "=" and where to use "+="
(see commits 0ccac16f59 and 1f5c1cfbae
for example).
Sorting the check-qtest-xxx entries by architecure instead and
using some empty lines inbetween should help to ease this
situation a little bit, so that it is hopefully now obvious
that new tests should be added with "+=" instead of "=".
While we are at it, this patch also comments out two of the
"gcov-files-..." lines since the corresponding m48t59-test is
disabled for sparc and sparc64, too.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Laurent Vivier
32f5f50dad spapr: implement H_CHANGE_LOGICAL_LAN_MAC h_call
Since kernel v4.0, linux uses H_CHANGE_LOGICAL_LAN_MAC to change lively
the MAC address of an ibmveth interface.

As QEMU doesn't implement this h_call, we can't change anymore the
MAC address of an spapr-vlan interface.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:13 +10:00
Benjamin Herrenschmidt
a007b19b37 ppc: Improve a few more helper flags
Mostly turn "store" type of helpers into TCG_CALL_NO_WG because
they can take exceptions. Also fixup_thrm doesn't read nor write
the tracked environment.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
76a3d2f750 ppc: Improve the exception helpers flags
They generate exceptions, but they don't update the environment

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
d0f6ced17f ppc: Improve flags for helpers loading/writing the time facilities
Those helpers never load from or store to the TCG tracked environment,
not do they generate synchronous exceptions (they might generate an
asynchronous interrupt but that's not an issue here).

So we can make them all use TCG_CALL_NO_RWG

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
accc60c47c ppc: Don't generate dead code on unconditional branches
We are always generating the "else" case of the condition even when
generating an unconditional branch that will never hit it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
4f5d326046 ppc: Stop dumping state on all exceptions in linux-user
Other archs don't do it, some programs catch signals just fine
and those dumps just clutter the output. Keep the dumps for cases
that aren't supposed to happen such as unknown codes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
ba4a8df83f ppc: Fix catching some segfaults in user mode
The usermode "translate" code generates an error code value that
has the "is_write" bit set, which causes our switch/case to miss
and display "Invalid segfault errno" and a spurrious second state
dump. Fix it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
dd2fa4f72d ppc: Fix macio ESCC legacy mapping
The current mapping, while correct for the base ports (which is all the
driver uses these days), is wrong for the extended registers.

I suspect the bugs come from incorrect tables in the CHRP IO Ref document,
I have verified the new values here match Apple's MacTech.pdf.

Note: Nothing that I know of actually uses these registers so it's not a
huge deal, but this patch has the added advantage of adding comments to
document what the registers are.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Cédric Le Goater
3654fa95bc hw/ppc: add a ppc_create_page_sizes_prop() helper routine
The exact same routine will be used in PowerNV.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Cédric Le Goater
ce9863b797 hw/ppc: use error_report instead of fprintf
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Benjamin Herrenschmidt
15848410af ppc: Rename #include'd .c files to .inc.c
Also while at it, group the #include statements in translate.c

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Nikunj A Dadhania
787bbe3711 target-ppc: add extswsli[.] instruction
extswsli : Extend Sign Word & Shift Left Immediate

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Vivek Andrew Sha
4004c1dbca target-ppc: add vsrv instruction
Adds Vector Shift Right Variable instruction.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reverse the order of computation to avoid temporary array ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:12 +10:00
Vivek Andrew Sha
5644a17567 target-ppc: add vslv instruction
vslv: Vector Shift Left Variable

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Swapnil Bokade
f7cc8466f1 target-ppc: add vcmpnez[b,h,w][.] instructions
Adds following instructions:

vcmpnezb[.]: Vector Compare Not Equal or Zero Byte
vcmpnezh[.]: Vector Compare Not Equal or Zero Halfword
vcmpnezw[.]: Vector Compare Not Equal or Zero Word

Signed-off-by: Swapnil Bokade <bokadeswapnil@gmail.com>
[ collapse switch case ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Sandipan Das
377070595a target-ppc: add vabsdu[b,h,w] instructions
Adds following instructions:

vabsdub: Vector Absolute Difference Unsigned Byte
vabsduh: Vector Absolute Difference Unsigned Halfword
vabsduw: Vector Absolute Difference Unsigned Word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ use ISA300 define. Drop etype ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Sandipan Das
217f6b8805 target-ppc: add dtstsfi[q] instructions
DFP Test Significance Immediate [Quad]

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Nikunj A Dadhania
4110b586de target-ppc: implement branch-less divd[o][.]
Similar to divw, implement branch-less divd.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Nikunj A Dadhania
b07c32dc4b target-ppc: implement branch-less divw[o][.]
While implementing modulo instructions figured out that the
implementation uses many branches. Change the logic to achieve the
branch-less code. Undefined value is set to dividend in case of invalid
input.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
5817355ed0 ppc: load/store multiple and string insns don't do LE
Just generate an alignment interrupt

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
65f2475f1f ppc: Use a helper to generate "LE unsupported" alignment interrupts
Some operations aren't allowed in LE mode, use a helper rather than
open coding the exception generation.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
5f2a625452 ppc: Don't set access_type on all load/stores on hash64
We don't use it so let's not generate the updates.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
fbc3b39b39 ppc: Fix CFAR updates
We were one instruction off

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
c9f82d013b ppc: Speed up dcbz
Use tlb_vaddr_to_host to do a fast path single translate for
the whole cache line. Also make the reservation check match
the entire range.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
22b56ee568 ppc: Handle unconditional (always/never) traps at translation time
We don't need to call a helper for trap always and trap never
which are used by Linux under some circumstances.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

v2. Don't generate the helper call when trapping always
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:11 +10:00
Benjamin Herrenschmidt
3433b732a4 ppc: Make alignment exceptions suck less
The current alignment exception generation tries to load the opcode
to put in DSISR from a context where a cpu_ldl_code() is really not
a good idea. It might fault and longjmp out and that's not something
we want happening here.

Instead, pass the releavant opcode bits via the error_code.

There are a couple of cases of alignment interrupts that won't set
anything, the ones coming from access to direct store segments, but
that doesn't happen in practice, nobody used direct store segments
and they are gone from newer chips.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
b00a3b3648 ppc: Don't update NIP in dcbz and lscbx
Instead, pass GETPC() result to the corresponding helpers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
573708e329 ppc: Don't update NIP if not taking alignment exceptions
Move the NIP update to after the conditional branch so that we
don't do it if we aren't going to take the alignment exception

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
72073dcce0 ppc: Don't update NIP on conditional trap instructions
This is no longer necessary as the helpers will properly retrieve
the return address when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
8c8966e218 ppc: Don't update NIP BookE 2.06 tlbwe
This is no longer necessary as the helpers will properly retrieve
the return address when needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
57a2988b6f ppc: Don't update NIP in facility unavailable interrupts
This is no longer necessary as the helpers will properly retrieve
the return address when needed. Also remove gen_update_current_nip()
which didn't seem to make much sense to me.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
a13f0a9bc4 ppc: Don't update NIP in DCR access routines
This is no longer necessary as the helpers will properly retrieve
the return address when needed

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
0f72b7c682 ppc: Fix source NIP on SLB related interrupts
We need to pass it to the raise helper since we don't update it
before the calls.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:10 +10:00
Benjamin Herrenschmidt
bd6fefe71c ppc: Make tlb_fill() use new exception helper
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:40:03 +10:00
Benjamin Herrenschmidt
af6d376ea1 ppc: Don't update NIP in lmw/stmw/icbi
Instead, pass GETPC() result to the corresponding helpers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:38:00 +10:00
Benjamin Herrenschmidt
e41029b378 ppc: Don't update NIP in lswi/lswx/stswi/stswx
Instead, pass GETPC() result to the corresponding helpers. This
requires a bit of fiddling to get the PC (hopefully) right in
the case where we generate a program check, though the hacks there
are temporary, a subsequent patch will clean this all up by always
having the nip already set to the right instruction when taking
the fault.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Fix trivial checkpatch warning]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:37:48 +10:00
Benjamin Herrenschmidt
1b7d17cae4 ppc: FP exceptions are always precise
We don't implement imprecise FP exceptions and using store_current
which sets SRR1 to the *previous* instruction never makes sense
for these. So let's be truthful and make them precise, which is
allowed by the architecture.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
ef24726e48 ppc: Don't update the NIP in floating point generated code
This is no longer necessary as the helpers will properly retrieve
the return address.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
44f35bd1ac ppc: Make float_check_status() pass the return address
Instead of relying on NIP having been updated already.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Fold in fix to mark function always_inline]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:47 +10:00
Benjamin Herrenschmidt
a93ecff935 ppc: Make float_invalid_op_excp() pass the return address
Instead of relying on NIP having been updated already

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
f63fbc00d4 ppc: Rename fload_invalid_op_excp to float_invalid_op_excp
No other change

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
3014427af5 ppc: Move VSX ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
0304af897b ppc: Move VMX ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
8b25cdd371 ppc: Move DFP ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
4083de6b53 ppc: Move embedded spe ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
f96511215d ppc: Move classic fp ops out of translate.c
Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Benjamin Herrenschmidt
db789c6cd3 ppc: Provide basic raise_exception_* functions
Instead of using the same helpers called from translate.c, let's have
a bunch of functions that take the various argument combinations,
especially the retaddr which will be needed in subsequent patches,
and leave the helpers to be just that, helpers for translate.c

We don't yet convert all users, we'll go through them in subsequent
patches.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

v2. Fix raise_exception_ra() to properly pass raddr
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:46 +10:00
Nikunj A Dadhania
323ad19bcc target-ppc: introduce opc4 for Expanded Opcode
ISA 3.0 has introduced EO - Expanded Opcode. Introduce third level
indirect opcode table and corresponding parsing routines.

EO (11:12) Expanded opcode field
Formats: XX1

EO (11:15) Expanded opcode field
Formats: VX, X, XX2

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[dwg: Trivial checkpatch fixup]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 12:33:33 +10:00
Nikunj A Dadhania
5f29cc8292 target-ppc: add maddhd and maddhdu instruction
maddhd: Multiply-Add High Doubleword
maddhdu: Multiply-Add High Doubleword Unsigned

Above two instruction are dual form and differ by 1 bit
(31st bit)

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the higher dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
aeeb044c7b target-ppc: add maddld instruction
maddld: Multiply-Add Low Doubleword

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the lower dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Vivek Andrew Sha
dc2ee038da target-ppc: add setb instruction
The CR number is provided in the opcode as - BFA (11:13)

Returns:
  -1 if bit 0 of CR field is set
   1 if bit 1 of CR field is set
   0 otherwise.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reworded commit, used 32bit ops as crf is 32bits ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
082ce33005 target-ppc: add cmpeqb instruction
Search a byte in the stream of 8bytes provided in the register

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
b35344e4a0 target-ppc: add cnttzw[.] instruction
Add ISA3.0: Count trailing zeros word instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Sandipan Das
e91d95b277 target-ppc: add cnttzd[.] instruction
Add ISA3.0 Count trailing zeros double word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ added ISA300 flag ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
063cf14fe3 target-ppc: add modulo dword operations
Adding following instructions for ISA3.0 support

modud: Modulo Unsigned Dword
modsd: Modulo Signed Dword

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
af2c66200e target-ppc: add modulo word operations
Adding following instructions:

moduw: Modulo Unsigned Word
modsw: Modulo Signed Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
f2442ef93c target-ppc: add cmprb instruction
ISA 3.0 Compare Ranged Byte instruction useful for
isupper/islower/isaplha kind of operation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
c5b2b9ce12 target-ppc: adding addpcis instruction
ISA 3.0 instruction for adding immediate value shifted with next
instruction address and return the result in the target register.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Nikunj A Dadhania
eb640b13a3 target-ppc: Introduce POWER ISA 3.0 flag
This flag will be used for POWER9 instructions.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Aneesh Kumar K.V
706d64675a target-ppc: Introduce Power9 family
The patch adds CPU PVR definition for POWER9 and enables QEMU to launch
guests/linux-user in TCG mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[ Added POWER9 alias, POWER9 SPAPR core and dropped MMU defines ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[dwg: Dropped sPAPR core type again for now]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Cédric Le Goater
7804c353a9 hw/ppc: include fdt helper routine in a common file
spapr_pci would also be a good candidate but the macro _FDT is
slightly different. It returns and does not exit.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Greg Kurz
1b1746a436 xics_kvm: drop extra checking of kernel_xics_fd
We abort a few lines above if kernel_xics_fd == -1.

This is only code cleanup.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-09-07 09:52:14 +10:00
Peter Maydell
7faae0b36e Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160906-1' into staging
target-arm queue:
 * fix incorrect LPAE bit in FSR for alignment faults
 * ACPI: fix the AML ID format for CPU devices to work for
   large numbers of CPUs
 * ast2400: add memory controller device model
 * m25p80: fix the vmstate structure name (migration break)

# gpg: Signature made Tue 06 Sep 2016 20:02:28 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160906-1:
  block: m25p80: Fix vmstate structure name
  ARM: ACPI: fix the AML ID format for CPU devices
  target-arm: Fix lpae bit in FSR on an alignment fault
  ast2400: add a memory controller device model

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 20:03:58 +01:00
Marcin Krzeminski
c827c06a4d block: m25p80: Fix vmstate structure name
Correct bad name of the vmstate structure. Since this breaks
compatibility also update vmstate version back to 0 and make
all fields independent of the VMState version.

Signed-off-by: Marcin Krzeminski <marcin.krzeminski@nokia.com>
Acked-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 1473146346-27337-1-git-send-email-marcin.krzeminski@nokia.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:18 +01:00
Wei Huang
f460be435f ARM: ACPI: fix the AML ID format for CPU devices
Current QEMU will stall guest VM booting under ACPI mode when vcpu count
is >= 12. Analyzing the booting log, it turns out that DSDT table can't
be loaded correctly due to "Invalid character(s) in name (0x62303043),
repaired: [C00*]". This is because existing QEMU uses a lower case AML
ID for CPU devices (e.g. C000, C001, ..., C00a, C00b). The ACPI code
inside guest VM detects this lower case character as an invalid character
(see acpi_ut_valid_acpi_char() in drivers/acpi/acpica/utstring.c file)
and converts it to "*". This causes duplicated IDs (i.e. "C00a" ==>"C00*"
and "C00b" ==> "C00*"). So ACPI refuses to load the table.

This patch fixes the problem by changing the format with a upper case
character. It matches the CPU ID formats used in other parts of QEMU
code.

Reported-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Wei Huang <wei@redhat.com>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1472852809-23042-1-git-send-email-wei@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Sergey Sorokin
e0fe723c24 target-arm: Fix lpae bit in FSR on an alignment fault
If an alignment fault occurred and target EL is using AArch32,
then DFSR/IFSR bit LPAE[9] must be set correctly.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Message-id: 1471283293-169850-1-git-send-email-afarallax@yandex.ru
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Cédric Le Goater
c2da8a8b90 ast2400: add a memory controller device model
The uboot in the previous release of the SDK was using a hardcoded
value for memory size. This is not true anymore, the value is now
retrieved from the memory controller.

Below is a model for this device, only supporting unlock and
configuration. Without it, we endup running a guest with 64MB, which
is a bit low nowdays. It uses a 'silicon-rev' property and ram_size to
build a default value. Some bits should be linked to SCU strapping
registers but it seems a bit complex to add for the current need.

The model is ready for the AST2500 SOC.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 19:52:17 +01:00
Peter Maydell
2926375cff Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 06 Sep 2016 11:38:01 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (36 commits)
  block: Allow node name for 'qemu-io' HMP command
  qemu-iotests: Log QMP traffic in debug mode
  block jobs: Improve error message for missing job ID
  coroutine: Assert that no locks are held on termination
  coroutine: Let CoMutex remember who holds it
  qcow2: fix iovec size at qcow2_co_pwritev_compressed
  test-coroutine: Fix coroutine pool corruption
  qemu-iotests: add vmdk for test backup compression in 055
  qemu-iotests: test backup compression in 055
  blockdev-backup: added support for data compression
  drive-backup: added support for data compression
  block: simplify blockdev-backup
  block: simplify drive-backup
  block/io: turn on dirty_bitmaps for the compressed writes
  block: remove BlockDriver.bdrv_write_compressed
  qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion
  qcow: add qcow_co_pwritev_compressed
  vmdk: add vmdk_co_pwritev_compressed
  qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion
  qcow2: add qcow2_co_pwritev_compressed
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 17:18:17 +01:00
Peter Maydell
f9ae6bcf1d Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160906-v2' into staging
First (big) chunk of s390x updates:
- cpumodel support for s390x
- various fixes and improvements

# gpg: Signature made Tue 06 Sep 2016 16:09:53 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160906-v2: (38 commits)
  s390x/cpumodel: implement QMP interface "query-cpu-model-baseline"
  s390x/cpumodel: implement QMP interface "query-cpu-model-comparison"
  s390x/cpumodel: implement QMP interface "query-cpu-model-expansion"
  qmp: add QMP interface "query-cpu-model-baseline"
  qmp: add QMP interface "query-cpu-model-comparison"
  qmp: add QMP interface "query-cpu-model-expansion"
  s390x/kvm: don't enable key wrapping if msa3 is disabled
  s390x/kvm: let the CPU model control CMM(A)
  s390x/kvm: disable host model for problematic compat machines
  s390x/kvm: implement CPU model support
  s390x/kvm: allow runtime-instrumentation for "none" machine
  s390x/sclp: propagate hmfai
  s390x/sclp: propagate the mha via sclp
  s390x/sclp: propagate the ibc val (lowest and unblocked ibc)
  s390x/sclp: indicate sclp features
  s390x/sclp: introduce sclp feature blocks
  s390x/sclp: factor out preparation of cpu entries
  s390x/cpumodel: check and apply the CPU model
  s390x/cpumodel: let the CPU model handle feature checks
  s390x/cpumodel: expose features and feature groups as properties
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 16:17:19 +01:00
David Hildenbrand
f1a47d08ef s390x/cpumodel: implement QMP interface "query-cpu-model-baseline"
Let's implement that interface by reusing our conversion code and
lookup code for CPU definitions.

In order to find a compatible CPU model, we first detect the maximum
possible CPU generation and then try to find a maximum model, satisfying
all base features (not exceeding the maximum generation).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-31-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
4e82ef0502 s390x/cpumodel: implement QMP interface "query-cpu-model-comparison"
Let's implement that interface by reusing our convertion code implemented
for expansion.

We use CPU generations and CPU features to calculate the result. This
means, that a zEC12 cannot simply be converted into a z13 by stripping
of features. This is required, as other magic values (e.g. maximum
address sizes) belong to a CPU generation and cannot simply be
emulated by an older generation.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-30-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
137974cea3 s390x/cpumodel: implement QMP interface "query-cpu-model-expansion"
In order to expand CPU models, we create temporary cpus that handle the
feature/group parsing. Only CPU feature properties are expanded.

When converting the data structure back, we always fall back to the
static base CPU model, which is by definition migration-safe.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-29-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
b18b604334 qmp: add QMP interface "query-cpu-model-baseline"
Let's provide a standardized interface to baseline two CPU models, to
create a third, compatible one. This is especially helpful when two
CPU models are not identical, but a CPU model is required that is
guaranteed to run under both configurations, where the original models run.

"query-cpu-model-baseline" takes two CPU models and returns a third,
compatible model. The result will always be a static CPU model.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-28-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
0031e0d683 qmp: add QMP interface "query-cpu-model-comparison"
Let's provide a standardized interface to compare two CPU models.
"query-cpu-model-compare" takes two models and returns  how they compare
in a specific configuration.

The result will give guarantees about runnability. E.g. if a CPU model A
is a subset of CPU model B, model A is guaranteed to run in configurations
where model B runs, but not the other way around (might or might not run).

Usually, CPU features or CPU generations are used to calculate the result.
If a model is not guaranteed to run in a certain environment (e.g.
incompatible), a  compatible one can be created by "baselining" both models
(follow up patch).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-27-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
e09484efbc qmp: add QMP interface "query-cpu-model-expansion"
Let's provide a standardized interface to expand CPU models. This interface
can be used by tooling to get details about a specific CPU model in a
certain configuration, e.g. about the "host" model.

To take care of all architectures, two detail levels for an expansion
are introduced. Certain architectures might not support all detail levels.
While "full" will expand and indicate all relevant properties/features
of a CPU model, "static" expands to a static base CPU model, that will
never change between QEMU versions and therefore have the same features
when used under different compatibility machines.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-26-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
c85d21c73b s390x/kvm: don't enable key wrapping if msa3 is disabled
As the CPU model now controls msa3, trying to set wrapping keys without
msa3 being around/enable in the kernel will produce misleading errors.

So let's simply not configure key wrapping if msa3 is not enabled and
make compat machines with disabled CPU model work correctly.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-25-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
07059effd1 s390x/kvm: let the CPU model control CMM(A)
Starting with recent kernels, if the cmma attributes are available, we
actually have hardware support. Enabling CMMA then means providing the
guest VCPU with CMM, therefore enabling its CMM facility.

Let's not blindly enable CMM anymore but let's control it using CPU models.
For disabled CPU models, CMMA will continue to always get enabled.

Also enable it in the applicable default models.

Please note that CMM doesn't work with hugetlbfs, therefore we will
warn the user and keep it disabled. Migrating from/to a hugetlbfs
configuration works, as it will be disabled on both sides.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-24-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:51 +02:00
David Hildenbrand
34821036cd s390x/kvm: disable host model for problematic compat machines
Compatibility machines that touch runtime-instrumentation should not
be used with the CPU model. Otherwise the host model will look different,
depending on the QEMU machine QEMU has been started with.

So let's simply disable the host model for existing compatibility machines
that all disable ri. This, in return, disables the CPU model for these
compat machines completely.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-23-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
3b84c25cc7 s390x/kvm: implement CPU model support
Let's implement our two hooks so we can support CPU models.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-22-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
392529cb77 s390x/kvm: allow runtime-instrumentation for "none" machine
To be able to query the correct host model for the "none" machine,
let's allow runtime-instrumentation for that machine.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-21-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
a366930780 s390x/sclp: propagate hmfai
hmfai is provided on CPU models >= z196. Let's propagate it properly.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-19-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
3fad3252a3 s390x/sclp: propagate the mha via sclp
The mha is provided in the CPU model, so get any CPU and extract the value.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-18-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
059be520d5 s390x/sclp: propagate the ibc val (lowest and unblocked ibc)
If we have a lowest ibc, we can indicate the ibc to the guest.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-17-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
4dd4200ee7 s390x/sclp: indicate sclp features
We have three different blocks in the SCLP read-SCP information response
that indicate sclp features. Let's prepare propagation.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-16-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
1c07e01b61 s390x/sclp: introduce sclp feature blocks
The sclp "read cpu info" and "read scp info" commands can include
features for the cpu info and configuration characteristics (extended),
decribing some advanced features available in the configuration.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-15-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
026546e6c3 s390x/sclp: factor out preparation of cpu entries
Let's factor out the common code of "read cpu info" and "read scp
info". This will make the introduction of new cpu entry fields easier.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-14-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
80560137cf s390x/cpumodel: check and apply the CPU model
We have to test if a configured CPU model is runnable in the current
configuration, and if not report why that is the case. This is done by
comparing it to the maximum supported model (host for KVM or z900 for TCG).
Also, we want to do some base sanity checking for a configured CPU model.

We'll cache the maximum model and the applied model (for performance
reasons and because KVM can only be configured before any VCPU is created).

For unavailable "host" model, we have to make sure that we inform KVM,
so it can do some compatibility stuff (enable CMMA later on to be precise).

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-13-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
7c72ac49ae s390x/cpumodel: let the CPU model handle feature checks
If we have certain features enabled, we have to migrate additional state
(e.g. vector registers or runtime-instrumentation registers). Let the
CPU model control that unless we have no "host" CPU model in the KVM
case. This will later on be the case for compatibility machines, so
migration from QEMU versions without the CPU model will still work.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-12-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
0754f60429 s390x/cpumodel: expose features and feature groups as properties
Let's add all features and feature groups as properties to all CPU models.
If the "host" CPU model is unknown, we can neither query nor change
features. KVM will just continue to work like it did until now.

We will not allow to enable features that were not part of the original
CPU model, because that could collide with the IBC in KVM.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-11-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
ad5afd07b6 s390x/cpumodel: store the CPU model in the CPU instance
A CPU model consists of a CPU definition, to which delta changes are
applied - features added or removed (e.g. z13-base,vx=on). In addition,
certain properties (e.g. cpu id) can later on change during migration
but belong into the CPU model. This data will later be filled from the
host model in the KVM case.

Therefore, store the configured CPU model inside the CPU instance, so
we can later on perform delta changes using properties.

For the "qemu" model, we emulate in TCG a z900. "host" will be
uninitialized (cpu->model == NULL) unless we have CPU model support in KVM
later on. The other models are all initialized from their definitions.
Only the "host" model can have a cpu->model == NULL.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-10-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:50 +02:00
David Hildenbrand
6c064de1e0 s390x/cpumodel: register defined CPU models as subclasses
This patch adds the CPU model definitions that are known on s390x -
like z900, zBC12 or z13. For each definition, introduce two CPU models:

1. Base model (e.g. z13-base): Minimum feature set we expect to be around
   on all z13 systems. These models are migration-safe and will never
   change.
2. Flexible models (e.g. z13): Models that can change between QEMU versions
   and will be extended over time as we implement further features that
   are already part of such a model in real hardware of certain
   configurations.

We want to work on features using ordinary bitmap operations, however we
can't initialize a bitmap statically (unsigned long[] ...). Therefore we
store the generated feature lists in separate arrays and convert them to
proper bitmaps before registering all our CPU model classes.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-9-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
8b3d6cb1fa s390x/cpumodel: introduce CPU feature group definitions
Let's use the generated groups to create feature group representations for
the user. These groups can later be used to enable/disable multiple
features in one shot and will be used to reduce the amount of reported
features to the user if all subfeatures are in place.

We want to work on features using ordinary bitmap operations, however we
can't initialize a bitmap statically (unsigned long[] ...). Therefore
we store the generated feature lists in separate arrays and convert
them to a proper bitmaps before they will ever be used.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-8-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
90229ebbad s390x/cpumodel: generate CPU feature group lists
Feature groups will be very helpful to reduce the amount of features
typically available in sane configurations. E.g. the MSA facilities
introduced loads of subfunctions, which could - in theory - go away
in the future, but we want to avoid reporting hundrets of features to
the user if usually all of them are in place.

Groups only contain features that were introduced in one shot, not just
random features. Therefore, groups can never change. This is an important
property regarding migration.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-7-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Michael Mueller
dced7eec3c s390x/cpumodel: generate CPU feature lists for CPU models
This patch introduces the helper "gen-features" which allows to generate
feature list definitions at compile time. Its flexibility is better and the
error-proneness is lower when compared to static programming time added
statements.

The helper includes "target-s390x/cpu_features.h" to be able to use named
facility bits instead of numbers. The generated defines will be used for
the definition of CPU models.

We generate feature lists for each HW generation and GA for EC models. BC
models are always based on a EC version and have no separate definitions.

Base features: Features we expect to be always available in sane setups.
Migration safe - will never change. Can be seen as "minimum features
required for a CPU model".

Default features: Features we expect to be stable and around in latest
setups (e.g. having KVM support) - not migration safe.

Max features: All supported features that are theoretically allowed for a
CPU model. Exceeding these features could otherwise produce problems with
IBC (instruction blocking controls) in KVM.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
[generate base, default and models. renaming and cleanup]
Message-Id: <20160905085244.99980-6-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Michael Mueller
7824174462 s390x/cpumodel: introduce CPU features
The patch introduces s390x CPU features (most of them refered to as
facilities) along with their discription and some functions that will be
helpful when working with the features later on.

Please note that we don't introduce all known CPU features, only the
ones currently supported by KVM + QEMU. We don't want to enable later
on blindly any facilities, for which we don't know yet if we need QEMU
support to properly support them (e.g. migrate additional state when
active). We can update QEMU later on.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
[reworked to include non-stfle features, added definitions]
Message-Id: <20160905085244.99980-5-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
6efadc9050 s390x/cpumodel: expose CPU class properties
Let's expose the description and migration safety and whether a definition
is static, as class properties, this can be helpful in the future.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-4-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
David Hildenbrand
41868f846d s390x/cpumodel: "host" and "qemu" as CPU subclasses
This patch introduces two CPU models, "host" and "qemu".
"qemu" is used as default when running under TCG. "host" is used
as default when running under KVM. "host" cannot be used without KVM.
"host" is not migration-safe. They both inherit from the base s390x CPU,
which is turned into an abstract class.

This patch also changes CPU creation to take care of the passed CPU string
and reuses common code parse_features() function for that purpose. Unknown
CPU definitions are now reported. The "-cpu ?" and "query-cpu-definition"
commands are changed to list all CPU subclasses automatically, including
migration-safety and whether static.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-3-dahi@linux.vnet.ibm.com>
[CH: fix up self-assignments in s390_cpu_list, as spotted by clang]
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-06 17:06:49 +02:00
Peter Maydell
085c915019 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 and memory backends queue, 2016-09-05

This includes a few features that were submitted just after hard
freeze, and a bug fix for memory backend initialization ordering.

# gpg: Signature made Mon 05 Sep 2016 20:50:14 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  vl: Delay initialization of memory backends
  vhost-user-test: Use libqos instead of pxe-virtio.rom
  target-i386: Add more Intel AVX-512 instructions support
  exec: Ensure the only one cpu_index allocation method is used

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 13:33:17 +01:00
Peter Maydell
30e7d092b2 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 05 Sep 2016 20:41:04 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: Avoid implicit bool->integer conversions
  trace: Remove 'trace_events_dstate_init'
  trace: add syslog tracing backend

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 12:41:24 +01:00
Peter Maydell
1fd66154fd Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160905' into staging
Xen 2016/09/05

# gpg: Signature made Mon 05 Sep 2016 19:59:47 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160905:
  xen: use native disk xenbus protocol if possible

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-06 11:43:18 +01:00
Eduardo Habkost
6546d0dba6 vl: Delay initialization of memory backends
Initialization of memory backends may take a while when
prealloc=yes is used, depending on their size. Initializing
memory backends before chardevs may delay the creation of monitor
sockets, and trigger timeouts on management software that waits
until the monitor socket is created by QEMU. See, for example,
the bug report at:
https://bugzilla.redhat.com/show_bug.cgi?id=1371211

In addition to that, allocating memory before calling
configure_accelerator() breaks the tcg_enabled() checks at
memory_region_init_*().

This patch fixes those problems by adding "memory-backend-*"
classes to the delayed-initialization list.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:03:47 -03:00
Eduardo Habkost
cdafe92961 vhost-user-test: Use libqos instead of pxe-virtio.rom
vhost-user-test relies on iPXE just to initialize the virtio-net
device, and doesn't do any actual packet tx/rx testing.

In addition to that, the test relies on TCG, which is
imcompatible with vhost. The test only worked by accident: a bug
the memory backend initialization made memory regions not have
the DIRTY_MEMORY_CODE bit set in dirty_log_mask.

This changes vhost-user-test to initialize the virtio-net device
using libqos, and not use TCG nor pxe-virtio.rom.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:03:47 -03:00
Luwei Kang
cc728d1493 target-i386: Add more Intel AVX-512 instructions support
Add more AVX512 feature bits, include AVX512DQ, AVX512IFMA,
AVX512BW, AVX512VL, AVX512VBMI. Its spec can be found at:
https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:01:55 -03:00
Igor Mammedov
630eb0faf4 exec: Ensure the only one cpu_index allocation method is used
Make sure that cpu_index auto allocation isn't used in
combination with manual cpu_index assignment. And
dissallow out of order cpu removal if auto allocation
is in use.

Target that wishes to support out of order unplug should
switch to manual cpu_index assignment. Following patch
could be used as an example:
 (pc: init CPUState->cpu_index with index in possible_cpus[]))

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-05 16:01:55 -03:00
Lluís Vilanova
8eb1b9db55 trace: Avoid implicit bool->integer conversions
An explicit if/else is clearer than arithmetic assuming #true is 1,
while the compiler should be able to generate just as optimal code.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 147194273830.26836.5875729707953474838.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:02 -04:00
Lluís Vilanova
a4d50b1d2a trace: Remove 'trace_events_dstate_init'
Removes the event state array used for early initialization. Since only
events with the "vcpu" property need a late initialization fixup,
threats their initialization specially.

Assumes that the user won't touch the state of "vcpu" events between
early and late initialization (e.g., through QMP).

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 147194273191.26836.14423079546263831356.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:01 -04:00
Paul Durrant
0a85241756 trace: add syslog tracing backend
This patch adds a tracing backend which sends output using syslog().
The syslog backend is limited to POSIX compliant systems.

openlog() is called with facility set to LOG_DAEMON, with the LOG_PID
option. Trace events are logged at level LOG_INFO.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Message-id: 1470318254-29989-1-git-send-email-paul.durrant@citrix.com
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 13:47:01 -04:00
Kevin Wolf
e7f98f2f92 block: Allow node name for 'qemu-io' HMP command
When using a node name, create a temporary BlockBackend that is used to
run the qemu-io command.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
c0088d79a7 qemu-iotests: Log QMP traffic in debug mode
Python tests are already annoying enough to debug. With QMP traffic
available it's a little bit easier at least.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
1562047c89 block jobs: Improve error message for missing job ID
If a block job is started with a node name rather than a device name and
no explicit job ID is passed, it was reported that '' isn't a
well-formed ID. Which is correct, but we can make the message a little
bit nicer.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
1b7f01d966 coroutine: Assert that no locks are held on termination
A coroutine that takes a lock must also release it again. If the
coroutine terminates without having released all its locks, it's buggy
and we'll probably run into a deadlock sooner or later. Make sure that
we don't get such cases.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
0e438cdc93 coroutine: Let CoMutex remember who holds it
In cases of deadlocks, knowing who holds a given CoMutex is really
helpful for debugging. Keeping the information around doesn't cost much
and allows us to add another assertion to keep the code correct, so
let's just add it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
8b2bd09338 qcow2: fix iovec size at qcow2_co_pwritev_compressed
Use bytes as the size would be more exact than s->cluster_size.  Although
qemu_iovec_to_buf() will not allow to go beyond the qiov.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Kevin Wolf
980e66216f test-coroutine: Fix coroutine pool corruption
The test case overwrites the Coroutine object with 0xff as a way to
assert that the coroutine isn't used any more. However, this means that
the coroutine pool now contains a corrupted object and later test cases
may get this corrupted object and crash.

This patch saves the real content of the object and restores it after
completing the test. The only use of the coroutine pool between those
two points is the deletion of co2. As this only means an insertion at
the head of an SLIST (release_pool or alloc_pool), it doesn't access the
invalid list pointers that co1 has during this period.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
00198ecc77 qemu-iotests: add vmdk for test backup compression in 055
The vmdk format has support for compression, it would be fine to add it for
the test backup compression

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
e1b5c51f4c qemu-iotests: test backup compression in 055
Added cases to check the backup compression out of qcow2, raw in qcow2
on drive-backup and blockdev-backup.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
3b7b123659 blockdev-backup: added support for data compression
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
13b9414b57 drive-backup: added support for data compression
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

The patch adds a flag to the qmp/hmp drive-backup command which enables
block compression. Compression should be implemented in the format driver
to enable this feature.

There are some limitations of the format driver to allow compressed writes.
We can write data only once. Though for backup this is perfectly fine.
These limitations are maintained by the driver and the error will be
reported if we are doing something wrong.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
dc7a4a9ed1 block: simplify blockdev-backup
Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting blockdev-backup parameters.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
81206a8987 block: simplify drive-backup
Now that we can support boxed commands, use it to greatly reduce the
number of parameters (and likelihood of getting out of sync) when
adjusting drive-backup parameters.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
3ea1a09111 block/io: turn on dirty_bitmaps for the compressed writes
Previously was added the assert:

  commit 1755da16e3
  Author: Paolo Bonzini <pbonzini@redhat.com>
  Date:   Thu Oct 18 16:49:18 2012 +0200
  block: introduce new dirty bitmap functionality

Now the compressed write is always in coroutine and setting the bits is
done after the write, so that we can return the dirty_bitmaps for the
compressed writes.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
35fadca80e block: remove BlockDriver.bdrv_write_compressed
There are no block drivers left that implement the old
.bdrv_write_compressed interface, so it can be removed. Also now we have
no need to use the bdrv_pwrite_compressed function and we can remove it
entirely.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
655923df4b qcow: cleanup qcow_co_pwritev_compressed to avoid the recursion
Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
f2b95a1231 qcow: add qcow_co_pwritev_compressed
Added implementation of the qcow_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
b2c622d365 vmdk: add vmdk_co_pwritev_compressed
Added implementation of the vmdk_co_pwritev_compressed function that
will allow us to safely use compressed writes for the vmdk from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
a2c0ca6f55 qcow2: cleanup qcow2_co_pwritev_compressed to avoid the recursion
Now that the function uses a vector instead of a buffer, there is no
need to use recursive code.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
fcccefc57f qcow2: add qcow2_co_pwritev_compressed
Added implementation of the qcow2_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow2 from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
29a298af9d block/io: reuse bdrv_co_pwritev() for write compressed
For bdrv_pwrite_compressed() it looks like most of the code creating
coroutine is duplicated in bdrv_prwv_co(). So we can just add a flag
(BDRV_REQ_WRITE_COMPRESSED) and use bdrv_prwv_co() as a generic one.
In the end we get coroutine oriented function for write compressed by using
bdrv_co_pwritev/blk_co_pwritev with BDRV_REQ_WRITE_COMPRESSED flag.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:48 +02:00
Pavel Butsykin
751e2f0698 block: Convert bdrv_pwrite_compressed() to BdrvChild
Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:47 +02:00
Pavel Butsykin
fe5c1355e7 block: switch blk_write_compressed() to byte-based interface
This is a preparatory patch, which continues the general trend of the
transition to the byte-based interfaces. bdrv_check_request() and
blk_check_request() are no longer used, thus we can remove them.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
094138d09e nbd-server: Allow node name for nbd-server-add
There is no reason why an NBD server couldn't be started for any node,
even if it's not on the top level. This converts nbd-server-add to
accept a node-name.

Note that there is a semantic difference between using a BlockBackend
name and the node name of its root: In the former case, the NBD server
is closed on eject; in the latter case, the NBD server doesn't drop its
reference and keeps the image file open this way.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
cd7fca952c nbd-server: Use a separate BlockBackend
The builtin NBD server uses its own BlockBackend now instead of reusing
the monitor/guest device one.

This means that it has its own writethrough setting now. The builtin
NBD server always uses writeback caching now regardless of whether the
guest device has WCE enabled. qemu-nbd respects the cache mode given on
the command line.

We still need to keep a reference to the monitor BB because we put an
eject notifier on it, but we don't use it for any I/O.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
0524e93a3f block: Accept node-name for drive-mirror
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
drive-mirror to accept a node-name without lifting the restriction that
we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
b7e4fa2242 block: Accept node-name for drive-backup
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
drive-backup and the corresponding transaction action to accept a
node-name without lifting the restriction that we're operating at a root
node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
7b5dca3f02 block: Accept node-name for change-backing-file
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
change-backing-file to accept a node-name without lifting the
restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
75dfd402a7 block: Accept node-name for blockdev-snapshot-internal-sync
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-snapshot-internal-sync to accept a node-name without lifting
the restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
2dfb4c033f block: Accept node-name for blockdev-snapshot-delete-internal-sync
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-snapshot-delete-internal-sync to accept a node-name without
lifting the restriction that we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
07eec65272 block: Accept node-name for blockdev-mirror
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-mirror to accept a node-name without lifting the restriction
that we're operating at a root node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
cef34eebf3 block: Accept node-name for blockdev-backup
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
blockdev-backup and the corresponding transaction action to accept a
node-name without lifting the restriction that we're operating at a root
node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
1d13b167fd block: Accept node-name for block-commit
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
block-commit to accept a node-name without lifting the restriction that
we're operating at a root node.

As libvirt makes use of the DeviceNotFound error class, we must add
explicit code to retain this behaviour because qmp_get_root_bs() only
returns GenericErrors.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
b6c1bae5df block: Accept node-name for block-stream
In order to remove the necessity to use BlockBackend names in the
external API, we want to allow node-names everywhere. This converts
block-stream to accept a node-name without lifting the restriction that
we're operating at a root node.

In case of an invalid device name, the command returns the GenericError
error class now instead of DeviceNotFound, because this is what
qmp_get_root_bs() returns.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
9ef6e505f0 scsi: scsi-cd without drive property for empty drive
This allows the creation of an empty scsi-cd device without manually
creating a BlockBackend.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:47 +02:00
Kevin Wolf
67c75f3dff ide: ide-cd without drive property for empty drive
This allows the creation of an empty ide-cd device without manually
creating a BlockBackend.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
2016-09-05 19:06:47 +02:00
David Hildenbrand
fc4b84b1c6 qmp: details about CPU definitions in query-cpu-definitions
It might be of interest for tooling whether a CPU definition can be safely
used when migrating, or if e.g. CPU features might get lost during
migration when migrationg from/to a different QEMU version or host, even if
the same compatibility machine is used.

Also, we want to know if a CPU definition is static and will never change.
Beause these definitions can then be used independantly of a compatibility
machine and will always have the same feature set, they can e.g. be used
to indicate the "host" model in libvirt later on.

Let's add two return values to query-cpu-definitions, stating for each
returned CPU definition, if it is migration-safe and if it is static.

While "migration-safe" is optional, "static" will be set to "false"
automatically by all implementing architectures. If a model really was
static all the time and will be in the future, this can simply be changed
later.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Message-Id: <20160905085244.99980-2-dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:42:36 +02:00
David Hildenbrand
b60fae32ff s390x/kvm: 2 byte software breakpoint support
Diag 501 (4 bytes) was used until now for software breakpoints on s390.
As instructions on s390 might be 2 bytes long, temporarily overwriting them
with 4 bytes is evil and can result in very strange guest behaviour.

We make use of invalid instruction 0x0000 as new sw breakpoint instruction.
We have to enable interception of that instruction in KVM using a
capability.

If no software breakpoint has been inserted at the reported position, an
operation exception has to be injected into the guest. Otherwise a
breakpoint has been hit and the pc has to be rewound.

If KVM doesn't yet support interception of instruction 0x0000 the
existing mechanism exploiting diag 501 is used. To keep overhead low,
interception of instruction 0x0000 will only be enabled if sw breakpoints
are really used.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
dbdfea9226 linux-headers: update
Update headers against 4.8-rc2.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
882b3b9769 s390x/css: handle cssid 255 correctly
The cssid 255 is reserved but still valid from an architectural
point of view. However, feeding a bogus schid of 0xffffffff into
the virtio hypercall will lead to a crash:

Stack trace of thread 138363:
        #0  0x00000000100d168c css_find_subch (qemu-system-s390x)
        #1  0x00000000100d3290 virtio_ccw_hcall_notify
        #2  0x00000000100cbf60 s390_virtio_hypercall
        #3  0x000000001010ff7a handle_hypercall
        #4  0x0000000010079ed4 kvm_cpu_exec (qemu-system-s390x)
        #5  0x00000000100609b4 qemu_kvm_cpu_thread_fn
        #6  0x000003ff8b887bb4 start_thread (libpthread.so.0)
        #7  0x000003ff8b78df0a thread_start (libc.so.6)

This is because the css array was only allocated for 0..254
instead of 0..255.

Let's fix this by bumping MAX_CSSID to 255 and fencing off the
reserved cssid of 255 during css image allocation.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
5759db1936 s390x/ioinst: advertise fcs facility
As we provide format 1 chsc scpd data (and don't support any ficon
channels), we de facto already have the ficon-cascaded-switch
facility.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
f2cab7f148 s390x: wrap flic savevm calls into vmstate
Just a simple conversion to get rid of register_savevm.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Christian Borntraeger
989fd865f5 s390/sclp: cache the sclp device
With the current code a simple sclp command takes about 13000 ns
The biggest part seems to be the resolver of the object model. By
caching the sclp device the time for an sclp command goes down to
2500ns. Talking about real life scenarios, this change doubles
the speed of the sclp console when sending single bytes outputs
to /dev/console.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Yi Min Zhao
0c2a16a4dc s390x/pci: assert zpci always existing
If one pci device is plugged successfully, there must be a zpci device
existing. This means that during hot-unplugging a pci device, its
corresponding zpci device must be found. Therefore we use an assert to
replace current code.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Yi Min Zhao
0d36d79192 s390x/pci: return directly if create zpci failed
In the case that zpci is automatically created, we did not return
immediately on failure, which would lead to NULL pointer dereferencing.
Let's fix it.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Cornelia Huck
61823988df s390x: add compat machine for 2.8
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-09-05 15:15:16 +02:00
Peter Maydell
e87d397e5e Open 2.8 development tree
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-05 11:38:54 +01:00
Peter Maydell
1dc33ed90b Update version for v2.7.0 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-09-02 13:44:11 +01:00
Juergen Gross
4ada797b05 xen: use native disk xenbus protocol if possible
The qdisk implementation is using the native xenbus protocol only in
case of no protocol specified at all. As using the explicit 32- or
64-bit protocol is slower than the native one due to copying requests
not by memcpy but element for element, this is not optimal.

Correct this by using the native protocol in case word sizes of
frontend and backend match.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-30 15:01:01 -07:00
Peter Maydell
12d2c4184c Update version for v2.7.0-rc5 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 20:39:45 +01:00
Greg Kurz
56f101ecce 9pfs: handle walk of ".." in the root directory
The 9P spec at http://man.cat-v.org/plan_9/5/intro says:

All directories must support walks to the directory .. (dot-dot) meaning
parent directory, although by convention directories contain no explicit
entry for .. or . (dot).  The parent of the root directory of a server's
tree is itself.

This means that a client cannot walk further than the root directory
exported by the server. In other words, if the client wants to walk
"/.." or "/foo/../..", the server should answer like the request was
to walk "/".

This patch just does that:
- we cache the QID of the root directory at attach time
- during the walk we compare the QID of each path component with the root
  QID to detect if we're in a "/.." situation
- if so, we skip the current component and go to the next one

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:23:00 +01:00
Greg Kurz
805b5d98c6 9pfs: forbid . and .. in file names
According to the 9P spec http://man.cat-v.org/plan_9/5/open about the
create request:

The names . and .. are special; it is illegal to create files with these
names.

This patch causes the create and lcreate requests to fail with EINVAL if
the file name is either "." or "..".

Even if it isn't explicitly written in the spec, this patch extends the
checking to all requests that may cause a directory entry to be created:

    - mknod
    - rename
    - renameat
    - mkdir
    - link
    - symlink

The unlinkat request also gets patched for consistency (even if
rmdir("foo/..") is expected to fail according to POSIX.1-2001).

The various error values come from the linux manual pages.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:21:56 +01:00
Greg Kurz
fff39a7ad0 9pfs: forbid illegal path names
Empty path components don't make sense for most commands and may cause
undefined behavior, depending on the backend.

Also, the walk request described in the 9P spec [1] clearly shows that
the client is supposed to send individual path components: the official
linux client never sends portions of path containing the / character for
example.

Moreover, the 9P spec [2] also states that a system can decide to restrict
the set of supported characters used in path components, with an explicit
mention "to remove slashes from name components".

This patch introduces a new name_is_illegal() helper that checks the
names sent by the client are not empty and don't contain unwanted chars.
Since 9pfs is only supported on linux hosts, only the / character is
checked at the moment. When support for other hosts (AKA. win32) is added,
other chars may need to be blacklisted as well.

If a client sends an illegal path component, the request will fail and
ENOENT is returned to the client.

[1] http://man.cat-v.org/plan_9/5/walk
[2] http://man.cat-v.org/plan_9/5/intro

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 19:21:39 +01:00
Peter Maydell
2b294f6b65 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* pc-bios/optionrom/Makefile fix for -O0
* revert socket_connect change

# gpg: Signature made Tue 30 Aug 2016 15:36:59 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  optionrom: cope with multiple -O options
  Revert "Change net/socket.c to use socket_*() functions"

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 17:11:14 +01:00
Paolo Bonzini
336d5881a9 optionrom: cope with multiple -O options
Reproducer:

    CFLAGS="-g3 -O0" ./configure --target-list=aarch64-softmmu,arm-softmmu --enable-vhost-net --enable-virtfs

Here CFLAGS ends up with "-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 ... -g3 -O0"
and pc-bios/optionrom/Makefile forgets to add the -O2 it needs.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-30 16:28:46 +02:00
Paolo Bonzini
616018352c Revert "Change net/socket.c to use socket_*() functions"
Since commit 7e8449594c, the socket connect code is blocking, because
calling socket_connect() without callback is blocking.  This reverts the
commit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-30 16:28:46 +02:00
Christian Borntraeger
135a972b45 translate: early exit in tb_flush if there is no tcg
tb_flush does all kind of things, which are very tcg specific. As it
is called from some places even for KVM (e.g. gdb server) it is better
to detect these cases and do an early exit.
This also fixes a crash in the gdb server that was triggered by
commit 909eaac9bb ("tb hash: track translated blocks with qht").

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reported-by: Richard Henderson <rth@twiddle.net>
Reported-by: Brent Baccala <cosine@freesoft.org>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-id: 1472148686-39841-1-git-send-email-borntraeger@de.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 13:30:55 +01:00
Daniel P. Berrange
b69a553b4a ui: fix refresh of VNC server surface
In previous commit

  commit c7628bff41
  Author: Gerd Hoffmann <kraxel@redhat.com>
  Date:   Fri Oct 30 12:10:09 2015 +0100

    vnc: only alloc server surface with clients connected

the VNC server was changed so that the 'vd->server' pixman
image was only allocated when a client is connected.

Since then if a client disconnects and then reconnects to
the VNC server all they will see is a black screen until
they do something that triggers a refresh. On a graphical
desktop this is not often noticed since there's many things
going on which cause a refresh. On a plain text console it
is really obvious since nothing refreshes frequently.

The problem is that the VNC server didn't update the guest
dirty bitmap, so still believes its server image is in sync
with the guest contents.

To fix this we must explicitly mark the entire guest desktop
as dirty after re-creating the server surface. Move this
logic into vnc_update_server_surface() so it is guaranteed
to be call in all code paths that re-create the surface
instead of only in vnc_dpy_switch()

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Peter Lieven <pl@kamp.de>
Tested-by: Peter Lieven <pl@kamp.de>
Message-id: 1471365032-18096-1-git-send-email-berrange@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-30 11:20:24 +01:00
Peter Maydell
e00da552a0 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio: fixes

some bugfixes for virtio
balloon is still broken wrt migration

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 23 Aug 2016 17:33:11 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio: decrement vq->inuse in virtqueue_discard()
  virtio: recalculate vq->inuse after migration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-24 17:21:03 +01:00
Ed Maste
8c1c230a6e Fix bsd-user build after d915b7bb
Must include "qemu-version.h" for the QEMU_PKGVERSION definition.

Signed-off-by: Ed Maste <emaste@freebsd.org>
Message-id: 1471877833-52343-1-git-send-email-emaste@freebsd.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-24 16:42:40 +01:00
Stefan Hajnoczi
58a83c6149 virtio: decrement vq->inuse in virtqueue_discard()
virtqueue_discard() moves vq->last_avail_idx back so the element can be
popped again.  It's necessary to decrement vq->inuse to avoid "leaking"
the element count.

Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-23 19:20:24 +03:00
Stefan Hajnoczi
bccdef6b1a virtio: recalculate vq->inuse after migration
The vq->inuse field is not migrated.  Many devices don't hold
VirtQueueElements across migration so it doesn't matter that vq->inuse
starts at 0 on the destination QEMU.

At least virtio-serial, virtio-blk, and virtio-balloon migrate while
holding VirtQueueElements.  For these devices we need to recalculate
vq->inuse upon load so the value is correct.

Cc: qemu-stable@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-23 19:20:10 +03:00
Peter Maydell
d75aa4372f Update version for v2.7.0-rc4 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-22 15:29:41 +01:00
Peter Maydell
62680fad7f Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 22 Aug 2016 09:06:32 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  e1000e: remove internal interrupt flag
  slirp: fix segv when init failed

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-22 10:02:28 +01:00
Cao jin
e0af5a0e8b e1000e: remove internal interrupt flag
Commit 66bf7d58 removed internal msi state flag E1000E_USE_MSI, E1000E_USE_MSIX
is not necessary too, remove it now. And interrupt flag field intr_state also
can be removed now.

CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-22 16:06:08 +08:00
Marc-André Lureau
67f3280c06 slirp: fix segv when init failed
Since commit f6c2e66ae8, slirp uses an exit notifier to call
slirp_smb_cleanup. However, if init() failed, the notifier isn't added,
and removing it will fail:

==18447== Invalid write of size 8
==18447==    at 0x7EF2B5: notifier_remove (notify.c:32)
==18447==    by 0x48E80C: qemu_remove_exit_notifier (vl.c:2661)
==18447==    by 0x6A2187: net_slirp_cleanup (slirp.c:134)
==18447==    by 0x69419D: qemu_cleanup_net_client (net.c:338)
==18447==    by 0x69445B: qemu_del_net_client (net.c:401)
==18447==    by 0x6A2B81: net_slirp_init (slirp.c:366)
==18447==    by 0x6A4241: net_init_slirp (slirp.c:865)
==18447==    by 0x695C6D: net_client_init1 (net.c:1051)
==18447==    by 0x695F6E: net_client_init (net.c:1108)
==18447==    by 0x696DBA: net_init_netdev (net.c:1498)
==18447==    by 0x7F1F99: qemu_opts_foreach (qemu-option.c:1116)
==18447==    by 0x696E60: net_init_clients (net.c:1516)
==18447==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-22 15:20:32 +08:00
Sascha Silbe
5f9f818ea8 test-logging: don't hard-code paths in /tmp
Since f6880b7f [qemu-log: support simple pid substitution for logs],
test-logging creates files with hard-coded names in /tmp. In the best
case, this prevents multiple developers from running "make check" on
the same machine. In the worst case, it allows for symlink attacks,
enabling an attacker to overwrite files that are writable to the
developer running "make check".

Instead of hard-coding the paths, create a temporary directory using
g_dir_make_tmp() and clean it up afterwards.

Fixes: f6880b7f ("qemu-log: support simple pid substitution for logs")
Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-id: 1471545963-11720-3-git-send-email-silbe@linux.vnet.ibm.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 12:44:11 +01:00
Sascha Silbe
5045570009 glib: add compatibility implementation for g_dir_make_tmp()
We're going to make use of g_dir_make_tmp() in test-logging. Provide a
compatibility implementation of it for glib < 2.30.

May behave differently in some edge cases (e.g. pattern only at the
end of the template, the file name is not part of the error message),
but good enough in practice.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-id: 1471545963-11720-2-git-send-email-silbe@linux.vnet.ibm.com
[PMM: removed variable "template" which caused compilation failures
 when C++ files include glib-compat.h]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 12:42:40 +01:00
Michal Privoznik
60c6b790fc syscall.c: Redefine IFLA_* enums
In 9c37146782 I've tried to fix a broken build with older
linux-headers. However, I didn't do it properly. The solution
implemented here is to grab the enums that caused the problem
initially, and rename their values so that they are "QEMU_"
prefixed. In order to guarantee matching values with actual
enums from linux-headers, the enums are seeded with starting
values from the original enums.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-id: 75c14d6e8a97c4ff3931d69c13eab7376968d8b4.1471593869.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 09:47:51 +01:00
Michal Privoznik
aee5f8f98e Revert "syscall.c: Fix build with older linux-headers"
The fix I've made there was wrong. I mean, basically what I did
there was equivalent to:

  #if 0
  some code;
  #endif

This reverts commit 9c37146782.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-id: 40d61349e445c1ad5fef795da704bf7ed6e19c86.1471593869.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-19 09:47:51 +01:00
Peter Maydell
02b1ad881c Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu 18 Aug 2016 14:39:31 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block: fix possible reorder of flush operations
  block: fix deadlock in bdrv_co_flush

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 14:42:51 +01:00
Denis V. Lunev
156af3ac98 block: fix possible reorder of flush operations
This patch reduce CPU usage of flush operations a bit. When we have one
flush completed we should kick only next operation. We should not start
all pending operations in the hope that they will go back to wait on
wait_queue.

Also there is a technical possibility that requests will get reordered
with the previous approach. After wakeup all requests are removed from
the wait queue. They become active and they are processed one-by-one
adding to the wait queue in the same order. Though new flush can arrive
while all requests are not put into the queue.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Message-id: 1471457214-3994-3-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-18 14:36:49 +01:00
Evgeny Yakovlev
ce83ee57f6 block: fix deadlock in bdrv_co_flush
The following commit
    commit 3ff2f67a7c
    Author: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
    Date:   Mon Jul 18 22:39:52 2016 +0300
    block: ignore flush requests when storage is clean
has introduced a regression.

There is a problem that it is still possible for 2 requests to execute
in non sequential fashion and sometimes this results in a deadlock
when bdrv_drain_one/all are called for BDS with such stalled requests.

1. Current flushed_gen and flush_started_gen is 1.
2. Request 1 enters bdrv_co_flush to with write_gen 1 (i.e. the same
   as flushed_gen). It gets past flushed_gen != flush_started_gen and
   sets flush_started_gen to 1 (again, the same it was before).
3. Request 1 yields somewhere before exiting bdrv_co_flush
4. Request 2 enters bdrv_co_flush with write_gen 2. It gets past
   flushed_gen != flush_started_gen and sets flush_started_gen to 2.
5. Request 2 runs to completion and sets flushed_gen to 2
6. Request 1 is resumed, runs to completion and sets flushed_gen to 1.
   However flush_started_gen is now 2.

From here on out flushed_gen is always != to flush_started_gen and all
further requests will wait on flush_queue. This change replaces
flush_started_gen with an explicitly tracked active flush request.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1471457214-3994-2-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-18 14:36:49 +01:00
Peter Maydell
5844365fe8 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Thu 18 Aug 2016 06:36:16 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  net/net: properly handle multiple packets in net_fill_rstate()
  net: vmxnet: use g_new for pkt initialisation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 10:56:41 +01:00
Peter Maydell
4b887ae658 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
Fix 'make docker-test-mingw@fedora'

Peter,

This is the single patch that stalls patchew's mingw testing. Since it
is small and trivial, let's have it in 2.7.

Fam

# gpg: Signature made Wed 17 Aug 2016 13:13:53 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  curl: Cast fd to int for DPRINTF

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-18 10:27:57 +01:00
Zhang Chen
e9e0a5854b net/net: properly handle multiple packets in net_fill_rstate()
When network is busy, we will receive multiple packets at one time. In
that situation, we should keep trying to do the receiving instead of
finalizing only the first packet.

Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-18 12:20:57 +08:00
Li Qiang
47882fa497 net: vmxnet: use g_new for pkt initialisation
When network transport abstraction layer initialises pkt, the maximum
fragmentation count is not checked. This could lead to an integer
overflow causing a NULL pointer dereference. Replace g_malloc() with
g_new() to catch the multiplication overflow.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-18 12:05:18 +08:00
Fam Zheng
92b6a16087 curl: Cast fd to int for DPRINTF
Currently "make docker-test-mingw@fedora" has a warning like:

    /tmp/qemu-test/src/block/curl.c: In function 'curl_sock_cb':
    /tmp/qemu-test/src/block/curl.c:172:6: warning: format '%d' expects
    argument of type 'int', but argument 4 has type 'curl_socket_t {aka long
    long unsigned int}'
         DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
          ^
    cc1: all warnings being treated as errors

Cast to int to suppress it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470027888-24381-1-git-send-email-famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2016-08-17 19:57:54 +08:00
Peter Maydell
5f0e775348 Update version for v2.7.0-rc3 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 17:34:30 +01:00
Peter Maydell
9fea273c85 linux-user: Fix llseek with high bit of offset_low set
The llseek syscall takes two 32-bit arguments, offset_high
and offset_low, which must be combined to form a single
64-bit offset. Unfortunately we were combining them with
   (uint64_t)arg2 << 32) | arg3
and arg3 is a signed type; this meant that when promoting
arg3 to a 64-bit type it would be sign-extended. The effect
was that if the offset happened to have bit 31 set then
this bit would get sign-extended into all of bits 63..32.
Explicitly cast arg3 to abi_ulong to avoid the erroneous
sign extension.

Reported-by: Chanho Park <parkch98@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Chanho Park <parkch98@gmail.com>
Message-id: 1470938379-1133-1-git-send-email-peter.maydell@linaro.org
2016-08-16 16:42:03 +01:00
Michal Privoznik
9c37146782 syscall.c: Fix build with older linux-headers
In c5dff280 we tried to make us understand netlink messages more.
So we've added a code that does some translation. However, the
code assumed linux-headers to be at least version 4.4 of it
because most of the symbols there (if not all of them) were added
in just that release. This, however, breaks build on systems with
older versions of the package.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Message-id: 23806aac6db3baf7e2cdab4c62d6e3468ce6b4dc.1471340849.git.mprivozn@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 16:14:48 +01:00
Marc-André Lureau
1451a7a673 qmp-commands.hx: remove outdated note
input-send-event is now stable since
6575ccddf4.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20160811112041.18616-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 15:19:52 +01:00
Peter Maydell
725092ede5 Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
target-i386: kernel_irqchip=off fix for KVM

# gpg: Signature made Tue 16 Aug 2016 12:55:42 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  target-i386: kvm: Report kvm_pv_unhalt as unsupported w/o kernel_irqchip

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 13:03:16 +01:00
Eduardo Habkost
648774779a target-i386: kvm: Report kvm_pv_unhalt as unsupported w/o kernel_irqchip
The kvm_pv_unhalt feature doesn't work if kernel_irqchip is
disabled, so we need to report it as unsupported.

Tested-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-08-16 08:49:53 -03:00
Thomas Huth
1f8b56e7ce slirp: Rename "struct arphdr" to "struct slirp_arphdr"
struct arphdr is already used by the system headers on OpenBSD
and thus QEMU does not compile here anymore. Fix it by renaming
our struct to slirp_arphdr instead.

Reported-by: Brad Smith
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1471249494-17392-1-git-send-email-thuth@redhat.com
Buglink: https://bugs.launchpad.net/qemu/+bug/1613133
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 12:17:36 +01:00
Marc-André Lureau
1dc8a6695c char: fix waiting for TLS and telnet connection
Since commit d7a04fd7d5, tcp_chr_wait_connected() was introduced,
so vhost-user could wait until a backend started successfully. In
vhost-user case, the chr socket must be plain unix, and the chr+vhost
setup happens synchronously during qemu startup.

However, with TLS and telnet socket, initial socket setup happens
asynchronously, and s->connected is not set after the socket is
accepted. In order for tcp_chr_wait_connected() to not keep accepting
new connections and proceed with the last accepted socket, it can
check for s->ioc instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20160816083332.15088-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 11:50:55 +01:00
Daniel P. Berrange
8afc224ffe virtio-gpu: fix missing log.h include file
The virtio-gpu.h file defines a macro VIRTIO_GPU_FILL_CMD
which includes a call to qemu_log_mask, but does not
include qemu/log.h. In a default configure, it is lucky
and gets qemu/log.h indirectly due to the 'log' trace
backend being enabled. If that trace backend is disabled
though, eg

 ./configure --enable-trace-backends=nop

Then the build will fail:

In file included from /home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:19:0:
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c: In function ‘virgl_cmd_create_resource_2d’:
/home/berrange/src/virt/qemu/include/hw/virtio/virtio-gpu.h:138:13: error: implicit declaration of function ‘qemu_log_mask’ [-Werror=implicit-function-declaration]
             qemu_log_mask(LOG_GUEST_ERROR,                              \
             ^
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:34:5: note: in expansion of macro ‘VIRTIO_GPU_FILL_CMD’
     VIRTIO_GPU_FILL_CMD(c2d);
     ^~~~~~~~~~~~~~~~~~~
/home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:34:5: error: nested extern declaration of ‘qemu_log_mask’ [-Werror=nested-externs]
In file included from /home/berrange/src/virt/qemu/hw/display/virtio-gpu-3d.c:19:0:
/home/berrange/src/virt/qemu/include/hw/virtio/virtio-gpu.h:138:27: error: ‘LOG_GUEST_ERROR’ undeclared (first use in this function)
             qemu_log_mask(LOG_GUEST_ERROR,                              \

[snip many more errors]

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470648700-3474-1-git-send-email-berrange@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 11:21:31 +01:00
Peter Maydell
2d1c8d5456 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160816' into staging
Build fix for the ccw bios (bios itself not rebuilt).

# gpg: Signature made Tue 16 Aug 2016 08:00:16 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160816:
  pc-bios/s390-ccw.img: Fix build

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 10:45:00 +01:00
Peter Maydell
66940d7491 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches for 2.7.0-rc3

# gpg: Signature made Mon 15 Aug 2016 14:55:46 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  iotests: Test case for wrong runtime option types
  block/nbd: Store runtime option values
  block/blkdebug: Store config filename
  block/nbd: Use QemuOpts for runtime options
  block/ssh: Use QemuOpts for runtime options

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-16 09:32:40 +01:00
Christian Borntraeger
c86c03cfd2 pc-bios/s390-ccw.img: Fix build
Since
commit a9c87304b7 ("build-sys: fix building with make CFLAGS=.. argument")

pc-bios/s390-ccw.img build might fail with

--- snip ---
main.o: In function `virtio_setup':
qemu/pc-bios/s390-ccw/main.c:117: undefined reference to `__stack_chk_fail'
--- snip ---

Changing the CFLAGS to QEMU_CFLAGS does the trick. We also need to
add -fno-strict-aliasing as this was filtered out.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <1471258997-5811-1-git-send-email-borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-08-16 08:52:02 +02:00
Peter Maydell
f3b9e787ae Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160815' into staging
ppc patch queue for 2016-08-15

Just a single patch here, I hope this is the last ppc / spapr fix to
squeeze into qemu-2.7.

# gpg: Signature made Mon 15 Aug 2016 07:46:36 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160815:
  ppc: parse cpu features once

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 21:48:03 +01:00
Peter Maydell
e5bfef86fe Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160812-tag-2' into staging
Xen 2016/08/12, fixed commit message

# gpg: Signature made Sat 13 Aug 2016 00:39:09 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160812-tag-2:
  xen: handle inbound migration of VMs without ioreq server pages
  Xen: fix converity warning of xen_pt_config_init()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 19:04:51 +01:00
Peter Maydell
aba5d97664 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Fri 12 Aug 2016 11:48:03 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace-events: fix first line comment in trace-events

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 18:27:51 +01:00
Peter Maydell
e57218b6ed pc-bios/optionrom: Fix OpenBSD build with better detection of linker emulation
The various host OSes are irritatingly variable about the name
of the linker emulation we need to pass to ld's -m option to
build the i386 option ROMs. Instead of doing this via a
CONFIG ifdef, check in configure whether any of the emulation
names we know about will work and pass the right answer through
to the makefile. If we can't find one, we fall back to not trying
to build the option ROMs, in the same way we would for a non-x86
host platform.

This is in particular necessary to unbreak the build on OpenBSD,
since it wants a different answer to FreeBSD and we don't have
an existing CONFIG_ variable that distinguishes the two.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Sean Bruno <sbruno@freebsd.org>
Message-id: 1470672688-6754-1-git-send-email-peter.maydell@linaro.org
2016-08-15 17:21:30 +01:00
Pranith Kumar
dfd6076710 softfloat: Fix warn about implicit conversion from int to int8_t
Change the flag type to 'uint8_t' to fix the implicit conversion error.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Message-id: 20160810185502.32015-1-bobby.prani@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 16:15:38 +01:00
Michael S. Tsirkin
94c9cb31c0 Revert "vhost-user: Attempt to fix a race with set_mem_table."
This reverts commit 28ed5ef163.

I still think it's the right thing to do, but
tests have been failing sporadically.

Revert for now, and hope to fix it before the release.

Cc: Prerna Saxena <prerna.saxena@nutanix.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Marc-André Lureau <mlureau@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1471268075-3425-1-git-send-email-mst@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-15 15:12:21 +01:00
Max Reitz
7d3e693646 iotests: Test case for wrong runtime option types
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:29 +02:00
Max Reitz
03504d05f0 block/nbd: Store runtime option values
Store the runtime option values in the BDRVNBDState so they can later be
used in nbd_refresh_filename() without having to directly access the
options QDict which may contain values of non-string types.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:29 +02:00
Max Reitz
036990d72b block/blkdebug: Store config filename
Store the configuration file's filename so it can later be used in
bdrv_refresh_filename() without having to directly access the options
QDict which may contain a value of a non-string type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Max Reitz
7ccc44fd7d block/nbd: Use QemuOpts for runtime options
Using QemuOpts will prevent qemu from crashing if the input options have
not been validated (which is the case when they are specified on the
command line or in a json: filename) and some have the wrong type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Max Reitz
8a6a80896d block/ssh: Use QemuOpts for runtime options
Using QemuOpts will prevent qemu from crashing if the input options have
not been validated (which is the case when they are specified on the
command line or in a json: filename) and some have the wrong type.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-15 15:52:28 +02:00
Greg Kurz
e703d2f71c ppc: parse cpu features once
Considering that features are converted to global properties and
global properties are automatically applied to every new instance
of created CPU (at object_new() time), there is no point in
parsing cpu_model string every time a CPU created. So move
parsing outside CPU creation loop and do it only once.

Parsing also should be done before any CPU is created so that
features would affect the first CPU a well.

This patch does that for all PowerPC machine types.

It is based on previous work from Bharata:

https://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg07564.html

Signed-off-by: Greg Kurz <groug@kaod.org>
[clg: only kept the fix for the spapr platform. support for other
      platform will be added in 2.8 ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-13 17:32:58 +10:00
Paul Durrant
b7665c6027 xen: handle inbound migration of VMs without ioreq server pages
VMs created on older versions on Xen will not have been provisioned with
pages to support creation of non-default ioreq servers. In this case
the ioreq server API is not supported and QEMU's only option is to fall
back to using the default ioreq server pages as it did prior to
commit 3996e85c ("Xen: Use the ioreq-server API when available").

This patch therefore changes the code in xen_common.h to stop considering
a failure of xc_hvm_create_ioreq_server() as a hard failure but simply
as an indication that the guest is too old to support the ioreq server
API. Instead a boolean is set to cause reversion to old behaviour such
that the default ioreq server is then used.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-12 16:38:30 -07:00
Cao jin
c4f68f0b52 Xen: fix converity warning of xen_pt_config_init()
emu_regs is a pointer, ARRAY_SIZE doesn't return what we expect.
Since the remaining message is enough for debugging, so just remove it.
Also tweaked the message a little.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
2016-08-12 16:38:18 -07:00
Pranith Kumar
6bbbb0ac13 target-arm: Fix warn about implicit conversion
Clang warns about an implicit conversion as follows:

/mnt/devops/code/qemu/target-arm/neon_helper.c:1075:1: warning: implicit conversion from 'int' to 'int8_t' (aka 'signed char') changes value from 128 to -128 [-Wconstant-conversion]
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:116:83: note: expanded from macro 'NEON_VOP_ENV'
uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \
                                                                                  ^
/mnt/devops/code/qemu/target-arm/neon_helper.c:106:5: note: expanded from macro '\
NEON_VOP_BODY'
    NEON_DO##n; \
    ^~~~~~~~~~
<scratch space>:21:1: note: expanded from here
NEON_DO4
^~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:93:5: note: expanded from macro 'NEON_DO4'
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/mnt/devops/code/qemu/target-arm/neon_helper.c:1054:23: note: expanded from macro 'NEON_FN'
            dest = (1 << (sizeof(src1) * 8 - 1)); \
                 ~  ~~^~~~~~~~~~~~~~~~~~~~~~~~~

Fix it by casting to appropriate type.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-12 11:12:24 +01:00
Laurent Vivier
e723b87103 trace-events: fix first line comment in trace-events
Documentation is docs/tracing.txt instead of docs/trace-events.txt.

find . -name trace-events -exec \
     sed -i "s?See docs/trace-events.txt for syntax documentation.?See docs/tracing.txt for syntax documentation.?" \
     {} \;

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-id: 1470669081-17860-1-git-send-email-lvivier@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-12 10:36:01 +01:00
Peter Maydell
28b874429b Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.7-7' into staging
Migration:
 - couple of bug fixes
 - couple of typo fixes

# gpg: Signature made Thu 11 Aug 2016 12:36:00 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit-migration/tags/migration-for-2.7-7:
  migration/socket: fix typo in file header
  migration: fix live migration failure with compression
  migration: mmap error check fix
  migration/ram: fix typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 17:53:35 +01:00
Peter Maydell
d915b7bb4c Update ancient copyright string in -version output
Currently the -version command line argument prints a string ending
with "Copyright (c) 2003-2008 Fabrice Bellard".  This is now some
eight years out of date; abstract it out of the several places that
print the string and update it to:

Copyright (c) 2003-2016 Fabrice Bellard and the QEMU Project developers

to reflect the work by all the QEMU Project contributors over the
last decade.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1470309276-5012-1-git-send-email-peter.maydell@linaro.org
2016-08-11 16:24:53 +01:00
Peter Maydell
bea048dcb9 Merge remote-tracking branch 'remotes/amit/tags/vser-for-2.7-1' into staging
virtio-console: fix receiving data from guest

# gpg: Signature made Thu 11 Aug 2016 12:17:55 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit/tags/vser-for-2.7-1:
  virtio-console: set frontend open permanently for console devs

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 13:26:35 +01:00
Cao jin
474c624ddf migration/socket: fix typo in file header
Code of inet socket & unix socket is merged together.
Also add some newlines, make code block well separated.

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469696074-12744-4-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 17:03:51 +05:30
Liang Li
787d134fb1 migration: fix live migration failure with compression
Because of commit 11808bb0c4, which remove some condition checks
of 'f->ops->writev_buffer', 'qemu_put_qemu_file' should be enhanced
to clear the 'f_src->iovcnt', or 'f_src->iovcnt' may exceed the
MAX_IOV_SIZE which will break live migration. This should be fixed.

Signed-off-by: Liang Li <liang.z.li@intel.com>
Reported-by: Jinshi Zhang <jinshi.c.zhang@intel.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1470702146-24399-1-git-send-email-liang.z.li@intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:53 +05:30
Evgeny Yakovlev
0e8b3cdfbc migration: mmap error check fix
mmap man page:
"On success, mmap() returns a pointer to the mapped area. On error, the
value MAP_FAILED (that is, (void *) -1) is returned, and errno  is  set
to indicate the cause of the error."

The check in postcopy_get_tmp_page is definitely wrong and should be
fixed.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Amit Shah <amit.shah@redhat.com>
Message-Id: <1469785705-16670-1-git-send-email-den@openvz.org>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:38 +05:30
Cao jin
e110aa919a migration/ram: fix typo
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469776231-23820-1-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:59:33 +05:30
Daniel P. Berrange
bce6261eb2 virtio-console: set frontend open permanently for console devs
The virtio-console.c file handles both serial consoles
and interactive consoles, since they're backed by the
same device model.

Since serial devices are expected to be reliable and
need to notify the guest when the backend is opened
or closed, the virtio-console.c file wires up support
for chardev events. This affects both serial consoles
and interactive consoles, using a network connection
based chardev backend such as 'socket', but not when
using a PTY based backend or plain 'file' backends.

When the host side is not connected the handle_output()
method in virtio-serial-bus.c will drop any data sent
by the guest, before it even reaches the virtio-console.c
code. This means that if the chardev has a logfile
configured, the data will never get logged.

Consider for example, configuring a x86_64 guest with a
plain UART serial port

  -chardev socket,id=charserial1,host=127.0.0.1,port=9001,server,nowait,logfile=console1.log,logappend=on
  -device isa-serial,chardev=charserial1,id=serial1

vs a s390 guest which has to use the virtio-console port

  -chardev socket,id=charconsole1,host=127.0.0.1,port=9000,server,nowait,logfile=console2.log,logappend=on
  -device virtconsole,chardev=charconsole1,id=console1

The isa-serial one gets data written to the log regardless
of whether a client is connected, while the virtioconsole
one only gets data written to the log when a client is
connected.

There is no need for virtio-serial-bus.c to aggressively
drop the data for console devices, as the chardev code is
prefectly capable of discarding the data itself.

So this patch changes virtconsole devices so that they
are always marked as having the host side open. This
ensures that the guest OS will always send any data it
has (Linux virtio-console hvc driver actually ignores
the host open state and sends data regardless, but we
should not rely on that), and also prevents the
virtio-serial-bus code prematurely discarding data.

The behaviour of virtserialport devices is *not* changed,
only virtconsole, because for the former, it is important
that the guest OSknow exactly when the host side is opened
/ closed so it can do any protocol re-negotiation that may
be required.

Fixes bug: https://bugs.launchpad.net/qemu/+bug/1599214

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1470241360-3574-2-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-08-11 16:38:58 +05:30
Peter Maydell
144a6db0b0 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu 11 Aug 2016 11:35:33 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  linux-aio: Handle io_submit() failure gracefully

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-11 11:48:46 +01:00
Kevin Wolf
44713c9e85 linux-aio: Handle io_submit() failure gracefully
It is generally not expected that io_submit() fails other than with
-EAGAIN, but corner cases like SELinux refusing I/O when permissions are
revoked are still possible. In this case, we shouldn't abort, but just
return an I/O error for the request.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 1470741619-23231-1-git-send-email-kwolf@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-11 09:42:35 +01:00
Peter Maydell
d08306dc42 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio/vhost: fixes

some bugfixes for virtio/vhost

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 10 Aug 2016 16:16:22 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  vhost-user: Attempt to fix a race with set_mem_table.
  vhost-user: Introduce a new protocol feature REPLY_ACK.
  vhost: check for vhost_ops before using.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 17:14:35 +01:00
Peter Maydell
4b3e5c06a1 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* pc-bios/optionrom/Makefile fixes
* warning fixes for __atomic_load and -1 << x in clang
* missed interrupt fix from Gonglei
* checkpatch fix from Radim and myself

# gpg: Signature made Wed 10 Aug 2016 14:54:31 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  checkpatch: default to success if only warnings
  checkpatch: bump most warnings to errors
  CODING_STYLE, checkpatch: update line length rules
  checkpatch: check for CVS keywords on all sources
  checkpatch: tweak the files in which TABs are checked
  timer: set vm_clock disabled default
  checkpatch: ignore automatically imported Linux headers
  clang: Fix warning reg. expansion to 'defined'
  Disable warn about left shifts of negative values
  atomic: strip "const" from variables declared with typeof
  optionrom: fix compilation with mingw docker target
  optionrom: add -fno-stack-protector
  build-sys: fix building with make CFLAGS=.. argument
  linuxboot_dma: avoid guest ABI breakage on gcc vs. clang compilation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 15:59:08 +01:00
Prerna Saxena
28ed5ef163 vhost-user: Attempt to fix a race with set_mem_table.
The set_mem_table command currently does not seek a reply. Hence, there is
no easy way for a remote application to notify to QEMU when it finished
setting up memory, or if there were errors doing so.

As an example:
(1) Qemu sends a SET_MEM_TABLE to the backend (eg, a vhost-user net
application). SET_MEM_TABLE does not require a reply according to the spec.
(2) Qemu commits the memory to the guest.
(3) Guest issues an I/O operation over a new memory region which was configured on (1).
(4) The application has not yet remapped the memory, but it sees the I/O request.
(5) The application cannot satisfy the request because it does not know about those GPAs.

While a guaranteed fix would require a protocol extension (committed separately),
a best-effort workaround for existing applications is to send a GET_FEATURES
message before completing the vhost_user_set_mem_table() call.
Since GET_FEATURES requires a reply, an application that processes vhost-user
messages synchronously would probably have completed the SET_MEM_TABLE before replying.

Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Prerna Saxena
ca525ce561 vhost-user: Introduce a new protocol feature REPLY_ACK.
This introduces the VHOST_USER_PROTOCOL_F_REPLY_ACK.

If negotiated, client applications should send a u64 payload in
response to any message that contains the "need_reply" bit set
on the message flags. Setting the payload to "zero" indicates the
command finished successfully. Likewise, setting it to "non-zero"
indicates an error.

Currently implemented only for SET_MEM_TABLE.

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Ilya Maximets
ca10203cde vhost: check for vhost_ops before using.
'vhost_set_vring_enable()' tries to call function using pointer to
'vhost_ops' which can be already zeroized in 'vhost_dev_cleanup()'
while vhost disconnection.

Fix that by checking 'vhost_ops' before using. This fixes QEMU crash
on calling 'ethtool -L eth0 combined 2' if vhost disconnected.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-08-10 17:47:29 +03:00
Peter Maydell
d578cca333 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160810' into staging
ppc patch queue for 2016-08-10

Here are some more last minute PAPR and ppc related fixes for
qemu-2.7.  One patch makes compressed memory dumps work with guest
kernels using page sizes up to 64KiB.  This is important since most
current pseries guests use a 64KiB default page size.  The remainder
fix a regression with handling of CPU aliases which causes serious
problem for libvirt.

# gpg: Signature made Wed 10 Aug 2016 06:44:27 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160810:
  ppc/kvm: Register also a generic spapr CPU core family type
  ppc/kvm: Do not mess up the generic CPU family registration
  hw/ppc/spapr: Look up CPU alias names instead of hard-coding the aliases
  ppc: Introduce a function to look up CPU alias strings
  spapr: remove extra type variable
  ppc64: fix compressed dump with pseries kernel

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 15:13:30 +01:00
Paolo Bonzini
141de88654 checkpatch: default to success if only warnings
CHK-level checks have been removed from checkpatch or bumped to
errors, so there is no effect anymore for --strict/--subjective.
Furthermore, even most WARNs have been bumped to errors, with
WARN only reserved to things that patchew probably ought not
to complain about (and that maintainers probably will notice
anyway during review if they are extreme).

Default to exiting with success even if there are WARN-level
failures, and cause --strict to fail for warnings.  Maintainers
that want to have a strict 80-character limit for their subsystem
can add it to a commit hook for example.

The --subjective synonym is removed.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:44:51 +02:00
Paolo Bonzini
c2df878325 checkpatch: bump most warnings to errors
This only leaves a warning-level message for the extra-long lines
soft limit.  Everything else is bumped up.

In the future warnings can be added for checks that can have false
positives.

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:44:48 +02:00
Paolo Bonzini
8fbe3d1fcf CODING_STYLE, checkpatch: update line length rules
Line lengths above 80 characters do exist.  They are rare, but
they happen from time to time.  An ignored rule is worse than an
exception to the rule, so do the latter.

Some on the list expressed their preference for a soft limit that
is slightly lower than 80 characters, to account for extra characters
in unified diffs (including three-way diffs) and for email quoting.
However, there was no consensus on this so keep the 80-character
soft limit and add a hard limit at 90.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 12:22:33 +02:00
Paolo Bonzini
93eb8e31f3 checkpatch: check for CVS keywords on all sources
These should apply to all files, not just C/C++.  Tweak the regular
expression to check for whole words, to avoid false positives on Perl
variables starting with "Id".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 11:10:03 +02:00
Paolo Bonzini
906fb135e4 checkpatch: tweak the files in which TABs are checked
Include Python and shell scripts, and make an exception for Perl
scripts we imported from Linux or elsewhere.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-10 11:09:54 +02:00
Thomas Huth
d11b268e17 ppc/kvm: Register also a generic spapr CPU core family type
There is a regression with the "-cpu" parameter introduced by
the spapr CPU hotplug code: We used to allow to specify a
"CPU family" name with the "-cpu" parameter when running on KVM so
that the user does not need to know the gory details of the exact
CPU version of the host CPU. For example, it was possible to
use "-cpu POWER8" on a POWER8E host CPU. This behavior does not
work anymore with the new hot-pluggable spapr-cpu-core types.
Since libvirt already heavily depends on the old behavior, this
is quite a severe regression in the QEMU parameter interface.
Let's fix it by supporting a CPU family type for the spapr-cpu-core
on KVM, too.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1363812
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
9c83fc2e8e ppc/kvm: Do not mess up the generic CPU family registration
The code for registering the sPAPR CPU host core type has been
added inbetween the generic CPU host core type and the generic
CPU family type. That way the instance_init and the class_init
information got lost when registering the generic CPU family
type. Fix it by moving the generic family registration before
the spapr cpu core registration code.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
4babfaf05d hw/ppc/spapr: Look up CPU alias names instead of hard-coding the aliases
Hard-coding the CPU alias names in the spapr_cores[] array has
two big disadvantages:

1) We register a real type with the CPU alias name in
   spapr_cpu_core_register_types() - this prevents us from registering
   a CPU family name in kvm_ppc_register_host_cpu_type() with the same
   name (as we do it for the non-hotpluggable CPU types).

2) It's quite cumbersome to maintain the aliases here in sync with the
   ppc_cpu_aliases list from target-ppc/cpu-models.c.

So let's simply add proper alias lookup to the spapr cpu core code,
too (by checking whether the given model can be used directly, and
if not by trying to look up the given model as an alias name instead).

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Thomas Huth
caf6316de9 ppc: Introduce a function to look up CPU alias strings
We will need this function to look up the aliases in the
spapr-cpu-core code, too.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Cédric Le Goater
caebf37859 spapr: remove extra type variable
The sPAPR CPU core typename is already available in the upper
block. Let's use it and move the check upward also.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:12:20 +10:00
Laurent Vivier
760d88d1d0 ppc64: fix compressed dump with pseries kernel
If we don't provide the page size in target-ppc:cpu_get_dump_info(),
the default one (TARGET_PAGE_SIZE, 4KB) is used to create
the compressed dump. It works fine with Macintosh, but not with
pseries as the kernel default page size is 64KB.

Without this patch, if we generate a compressed dump in the QEMU monitor:

    (qemu) dump-guest-memory -z qemu.dump

This dump cannot be read by crash:

    # crash vmlinux qemu.dump
    ...
    WARNING: cannot translate vmemmap kernel virtual addresses:
             commands requiring page structure contents will fail
    ...

Page_size is used to determine the dumpfile's block size. The
block size needs to be at least the page size, but a multiple of page
size works fine too. For PPC64, linux supports either 4KB or 64KB software
page size. So we define the page_size to 64KB.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-10 13:05:05 +10:00
Gonglei
3fdd0ee393 timer: set vm_clock disabled default
(commit 80dcfb8532)
Upon migration, the code use a timer based on vm_clock for 1ns
in the future from post_load to do the event send in case host_connected
differs between migration source and target.

However, it's not guaranteed that the apic is ready to inject irqs into
the guest, and the irq line remained high, resulting in any future interrupts
going unnoticed by the guest as well.

That's because 1) the migration coroutine is not blocked when it get EAGAIN
while reading QEMUFile. 2) The vm_clock is enabled default currently, it doesn't
rely on the calling of vm_start(), that means vm_clock timers can run before
VCPUs are running.

So, let's set the vm_clock disabled default, keep the initial intention of
design for vm_clock timers.

Meanwhile, change the test-aio usecase, using QEMU_CLOCK_REALTIME instead of
QEMU_CLOCK_VIRTUAL as the block code does.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: qemu-stable@nongnu.org
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Message-Id: <1470728955-90600-1-git-send-email-arei.gonglei@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Radim Krčmář
93bf13c6df checkpatch: ignore automatically imported Linux headers
Linux uses tabs for indentation and checkpatch always complained about
automatically imported headers.  update-linux-headers.sh could be modified to
expand tabs, but there is no real reason to complain about any ugly code in
Linux headers, so skip all hunk-related checks.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Pranith Kumar
2368635d39 clang: Fix warning reg. expansion to 'defined'
Clang produces the following warning. The warning is detailed here:
https://reviews.llvm.org/D15866. Fix the warning.

/home/pranith/devops/code/qemu/hw/display/qxl.c:507:5: warning: macro expansion producing 'defined' has undefined behavior [-Wexpansion-to-defined]
    ^
/home/pranith/devops/code/qemu/include/ui/qemu-spice.h:46:5: note: expanded from macro 'SPICE_NEEDS_SET_MM_TIME'
  (!defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06))
    ^
/home/pranith/devops/code/qemu/hw/display/qxl.c:1074:5: warning: macro expansion producing 'defined' has undefined behavior [-Wexpansion-to-defined]
    ^
/home/pranith/devops/code/qemu/include/ui/qemu-spice.h:46:5: note: expanded from macro 'SPICE_NEEDS_SET_MM_TIME'
  (!defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06))

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Pranith Kumar
435405ac59 Disable warn about left shifts of negative values
It seems like there's no good reason for the compiler to exploit the
undefinedness of left shifts.  GCC explicitly documents that they do not
use at all this possibility and, while they also say this is subject
to change, they have been saying this for 10 years (since the wording
appeared in the GCC 4.0 manual).

Disable these warnings by passing in -Wno-shift-negative-value.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[pranith: forward-port part of patch to 2.7]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
5927ed846a atomic: strip "const" from variables declared with typeof
With the latest clang, we have the following warning:

    /home/pranith/devops/code/qemu/include/qemu/seqlock.h:62:21: warning: passing 'typeof (*&sl->sequence) *' (aka 'const unsigned int *') to parameter of type 'unsigned int *' discards qualifiers [-Wincompatible-pointer-types-discards-qualifiers]
        return unlikely(atomic_read(&sl->sequence) != start);
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~
    /home/pranith/devops/code/qemu/include/qemu/atomic.h:58:25: note: expanded from macro 'atomic_read'
        __atomic_load(ptr, &_val, __ATOMIC_RELAXED);     \
                           ^~~~~

Stripping const is a bit tricky due to promotions, but it is doable
with either C11 _Generic or GCC extensions.  Use the latter.

Reported-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[pranith: Add conversion for bool type]
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
9d4cd7b4ed optionrom: fix compilation with mingw docker target
Two fixes are needed.  First, mingw does not have -D_FORTIFY_SOURCE,
hence --enable-debug disables optimization.  This is not acceptable
for ROMs, which should override CFLAGS to force inclusion of -O2.

Second, PE stores global constructors and destructors using the
following linker script snippet:

     ___CTOR_LIST__ = .; __CTOR_LIST__ = . ;
			LONG (-1);*(.ctors); *(.ctor); *(SORT(.ctors.*));  LONG (0);
     ___DTOR_LIST__ = .; __DTOR_LIST__ = . ;
			LONG (-1); *(.dtors); *(.dtor); *(SORT(.dtors.*));  LONG (0);

The LONG directives cause the .img files to be 16 bytes too large;
the recently added check to signrom.py catches this.  To fix this,
replace -T and -e options with a linker script.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
b0e8f5cadc optionrom: add -fno-stack-protector
This is required by OpenBSD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Marc-André Lureau
a9c87304b7 build-sys: fix building with make CFLAGS=.. argument
When calling make with a CFLAGS=.. argument, the -g/-O filter is not
applied, which may result with build failure with ASAN for example. It
could be solved with an 'override' directive on CFLAGS, but that would
actually prevent setting different CFLAGS manually.

Instead, filter the CFLAGS argument from the top-level Makefile (so
you could still call make with a different CFLAGS argument on a
rom/Makefile manually)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20160805082421.21994-2-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Paolo Bonzini
7f2569246c linuxboot_dma: avoid guest ABI breakage on gcc vs. clang compilation
Recent GCC compiles linuxboot_dma.c to 921 bytes, while CentOS 6 needs
1029 and clang needs 1527.  Because the size of the ROM, rounded to the
next 512 bytes, must match, this causes the API to break between a <1K
ROM and one that is bigger.

We want to make the ROM 1.5 KB in size, but it's better to make clang
produce leaner ROMs, because currently it is worryingly close to the limit.
To fix this prevent clang's happy inlining (which -Os cannot prevent).
This only requires adding a noinline attribute.

Second, the patch makes sure that the ROM has enough padding to prevent
ABI breakage on different compilers.  The size is now hardcoded in the file
that is passed to signrom.py, as was the case before commit 6f71b77
("scripts/signrom.py: Allow option ROM checksum script to write the size
header.", 2016-05-23); signrom.py however will still pad the input to
the requested size.  This ensures that the padding goes beyond the
next multiple of 512 if necessary, and also avoids the need for
-fno-toplevel-reorder which clang doesn't support.  signrom.py can then
error out if the requested size is too small for the actual size of the
compiled ROM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-09 22:57:36 +02:00
Peter Maydell
2bb15bddf2 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Tue 09 Aug 2016 16:47:32 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  atapi: fix halted DMA reset

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-09 16:53:32 +01:00
John Snow
7f951b2d77 atapi: fix halted DMA reset
Followup to 87ac25fd, this time for ATAPI DMA.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 1470164128-28158-1-git-send-email-jsnow@redhat.com
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-08-09 11:47:23 -04:00
Peter Maydell
ab861f3915 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 09 Aug 2016 08:28:39 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  hw/net: Fix a heap overflow in xlnx.xps-ethernetlite
  net: vmxnet3: check for device_active before write
  net: check fragment length during fragmentation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-09 10:44:27 +01:00
chaojianhu
a0d1cbdacf hw/net: Fix a heap overflow in xlnx.xps-ethernetlite
The .receive callback of xlnx.xps-ethernetlite doesn't check the length
of data before calling memcpy. As a result, the NetClientState object in
heap will be overflowed. All versions of qemu with xlnx.xps-ethernetlite
will be affected.

Reported-by: chaojianhu <chaojianhu@hotmail.com>
Signed-off-by: chaojianhu <chaojianhu@hotmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 15:27:18 +08:00
Li Qiang
6c352ca9b4 net: vmxnet3: check for device_active before write
Vmxnet3 device emulator does not check if the device is active,
before using it for write. It leads to a use after free issue,
if the vmxnet3_io_bar0_write routine is called after the device is
deactivated. Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 15:24:56 +08:00
Prasad J Pandit
ead315e43e net: check fragment length during fragmentation
Network transport abstraction layer supports packet fragmentation.
While fragmenting a packet, it checks for more fragments from
packet length and current fragment length. It is susceptible
to an infinite loop, if the current fragment length is zero.
Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
CC: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-08-09 11:45:30 +08:00
Peter Maydell
53279c76cf Update version for v2.7.0-rc2 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 17:26:11 +01:00
Peter Maydell
4977bb09dd Merge remote-tracking branch 'remotes/armbru/tags/pull-monitor-2016-08-08' into staging
Monitor patches for 2016-08-08

# gpg: Signature made Mon 08 Aug 2016 13:24:42 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-monitor-2016-08-08:
  audio: clean up before monitor clean up
  monitor: fix crash when leaving qemu with spice audio

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 15:57:06 +01:00
Peter Maydell
b8dc0fcff1 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
More block layer patches for 2.7.0-rc2

# gpg: Signature made Mon 08 Aug 2016 12:51:30 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  iotests: fix 109
  mirror: finish earlier on error
  tests: Test blockjob IDs
  block/qdev: Let 'drive' property fall back to node name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 15:21:33 +01:00
Peter Maydell
684b6b26af Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160808' into staging
One more s390x fix for a bug in the pci rework.

# gpg: Signature made Mon 08 Aug 2016 11:49:34 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160808:
  s390x/pci: fix null pointer bug

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 14:24:51 +01:00
Peter Maydell
47dc0ec576 hw/sparc/leon3: Don't call get_image_size() on a NULL pointer
get_image_size() doesn't handle being passed a NULL pointer, so
avoid doing that. Spotted by the clang ub sanitizer (which notices
the attempt to pass NULL to open()).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1470391439-28427-1-git-send-email-peter.maydell@linaro.org
2016-08-08 13:58:42 +01:00
Peter Maydell
f5edfcfafb Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2016-08-08' into staging
Error reporting patches for 2016-08-08

# gpg: Signature made Mon 08 Aug 2016 08:14:49 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-error-2016-08-08:
  error: Fix error_printf() calls lacking newlines
  vfio: Use error_report() instead of error_printf() for errors
  checkpatch: Fix newline detection in error_setg() & friends
  error: Strip trailing '\n' from error string arguments (again)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 13:25:35 +01:00
Marc-André Lureau
a384c205ac audio: clean up before monitor clean up
Since aa5cb7f5e, the chardevs are being cleaned up when leaving qemu,
before the atexit() handlers. audio_cleanup() may use the monitor to
notify of changes. For compatibility reasons, let's clean up audio
before the monitor so it keeps emitting monitor events.

The audio_atexit() function is made idempotent (so it can be called
multiple times), and renamed to audio_cleanup(). Since coreaudio
backend is using a 'isAtexit' code path, change it to check
audio_is_cleaning_up() instead, so the path is taken during normal
exit.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20160801112343.29082-3-marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-08-08 14:17:00 +02:00
Marc-André Lureau
2ef45716e1 monitor: fix crash when leaving qemu with spice audio
Since aa5cb7f5e, the chardevs are being cleaned up when leaving
qemu. However, the monitor has still references to them, which may
lead to crashes when running atexit() and trying to send monitor
events:

 #0  0x00007fffdb18f6f5 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
 #1  0x00007fffdb1912fa in __GI_abort () at abort.c:89
 #2  0x0000555555c263e7 in error_exit (err=22, msg=0x555555d47980 <__func__.13537> "qemu_mutex_lock") at util/qemu-thread-posix.c:39
 #3  0x0000555555c26488 in qemu_mutex_lock (mutex=0x5555567a2420) at util/qemu-thread-posix.c:66
 #4  0x00005555558c52db in qemu_chr_fe_write (s=0x5555567a2420, buf=0x55555740dc40 "{\"timestamp\": {\"seconds\": 1470041716, \"microseconds\": 989699}, \"event\": \"SPICE_DISCONNECTED\", \"data\": {\"server\": {\"port\": \"5900\", \"family\": \"ipv4\", \"host\": \"127.0.0.1\"}, \"client\": {\"port\": \"40272\", \"f"..., len=240) at qemu-char.c:280
 #5  0x0000555555787cad in monitor_flush_locked (mon=0x5555567bd9e0) at /home/elmarco/src/qemu/monitor.c:311
 #6  0x0000555555787e46 in monitor_puts (mon=0x5555567bd9e0, str=0x5555567a44ef "") at /home/elmarco/src/qemu/monitor.c:353
 #7  0x00005555557880fe in monitor_json_emitter (mon=0x5555567bd9e0, data=0x5555567c73a0) at /home/elmarco/src/qemu/monitor.c:401
 #8  0x00005555557882d2 in monitor_qapi_event_emit (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x5555567c73a0) at /home/elmarco/src/qemu/monitor.c:472
 #9  0x000055555578838f in monitor_qapi_event_queue (event=QAPI_EVENT_SPICE_DISCONNECTED, qdict=0x5555567c73a0, errp=0x7fffffffca88) at /home/elmarco/src/qemu/monitor.c:497
 #10 0x0000555555c15541 in qapi_event_send_spice_disconnected (server=0x5555571139d0, client=0x5555570d0db0, errp=0x5555566c0428 <error_abort>) at qapi-event.c:1038
 #11 0x0000555555b11bc6 in channel_event (event=3, info=0x5555570d6c00) at ui/spice-core.c:248
 #12 0x00007fffdcc9983a in adapter_channel_event (event=3, info=0x5555570d6c00) at reds.c:120
 #13 0x00007fffdcc99a25 in reds_handle_channel_event (reds=0x5555567a9d60, event=3, info=0x5555570d6c00) at reds.c:324
 #14 0x00007fffdcc7d4c4 in main_dispatcher_self_handle_channel_event (self=0x5555567b28b0, event=3, info=0x5555570d6c00) at main-dispatcher.c:175
 #15 0x00007fffdcc7d5b1 in main_dispatcher_channel_event (self=0x5555567b28b0, event=3, info=0x5555570d6c00) at main-dispatcher.c:194
 #16 0x00007fffdcca7674 in reds_stream_push_channel_event (s=0x5555570d9910, event=3) at reds-stream.c:354
 #17 0x00007fffdcca749b in reds_stream_free (s=0x5555570d9910) at reds-stream.c:323
 #18 0x00007fffdccb5dad in snd_disconnect_channel (channel=0x5555576a89a0) at sound.c:229
 #19 0x00007fffdccb9e57 in snd_detach_common (worker=0x555557739720) at sound.c:1589
 #20 0x00007fffdccb9f0e in snd_detach_playback (sin=0x5555569fe3f8) at sound.c:1602
 #21 0x00007fffdcca3373 in spice_server_remove_interface (sin=0x5555569fe3f8) at reds.c:3387
 #22 0x00005555558ff6e2 in line_out_fini (hw=0x5555569fe370) at audio/spiceaudio.c:152
 #23 0x00005555558f909e in audio_atexit () at audio/audio.c:1754
 #24 0x00007fffdb1941e8 in __run_exit_handlers (status=0, listp=0x7fffdb5175d8 <__exit_funcs>, run_list_atexit=run_list_atexit@entry=true) at exit.c:82
 #25 0x00007fffdb194235 in __GI_exit (status=<optimized out>) at exit.c:104
 #26 0x00007fffdb17b738 in __libc_start_main (main=0x5555558d7874 <main>, argc=67, argv=0x7fffffffcf48, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7fffffffcf38) at ../csu/libc-start.c:323

Add a monitor_cleanup() functions to remove all the monitors before
cleaning up the chardev. Note that we are "losing" some events that
used to be sent during atexit().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20160801112343.29082-2-marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-08-08 14:16:11 +02:00
Peter Maydell
9efaf7f5f5 Merge remote-tracking branch 'remotes/elmarco/tags/leaks-for-2.7-pull-request' into staging
# gpg: Signature made Sun 07 Aug 2016 21:03:14 BST
# gpg:                using RSA key 0xDAE8E10975969CE5
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>"
# gpg:                 aka "Marc-André Lureau <marcandre.lureau@gmail.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276  F62D DAE8 E109 7596 9CE5

* remotes/elmarco/tags/leaks-for-2.7-pull-request:
  ahci: fix sglist leak on retry
  usb: free leaking path
  usb: free USBDevice.strings
  virtio-input: free config list
  qjson: free str
  ahci: free irqs array
  char: free MuxDriver when closing
  char: free the tcp connection data when closing
  numa: do not leak NumaOptions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 12:41:38 +01:00
Vladimir Sementsov-Ogievskiy
a752e4786c iotests: fix 109
109 iotest is broken for raw after 0965a41e99
[mirror: double performance of the bulk stage if the disc is full]

The problem is with finishing block-job with error: before specified
patch mirror was not very async and it created one big request at disk
start, this request finished with error and qemu produced
BLOCK_JOB_COMPLETED with zero progress.

After 0965a41, mirror starts several smaller requests in parallel, when
BLOCK_JOB_COMPLETED emited we have some successful non-zero progress.

This patch solves the issue by filtering out progress from 109 test
output.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Vladimir Sementsov-Ogievskiy
dbaa7b57ec mirror: finish earlier on error
Stop to produce new async copy requests from mirror_iteration if
critical error (error action = BLOCK_ERROR_ACTION_REPORT) detected.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Alberto Garcia
9ef8112a24 tests: Test blockjob IDs
Since 7f0317cfc8 we have API to specify the ID of block jobs and we
also guarantee that they are well-formed and unique.

This patch adds tests to check some common scenarios.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-08 13:05:43 +02:00
Kevin Wolf
bd7c41765b block/qdev: Let 'drive' property fall back to node name
If a qdev block device is created with an anonymous BlockBackend (i.e.
a node name rather than a BB name was given for the drive property),
qdev used to return an empty string when the property was read. This
patch fixes it to return the node name instead.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 13:05:43 +02:00
Yi Min Zhao
7fc0abf4cb s390x/pci: fix null pointer bug
We should make sure that it's not NULL firstly.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-08-08 12:47:02 +02:00
Peter Maydell
cbda16c010 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160808' into staging
ppc patch queue 2016-08-08

This batch has several last minute bug fixes to be merged for
qemu-2.7.

# gpg: Signature made Mon 08 Aug 2016 03:40:58 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160808:
  spapr: Fix undefined behaviour in spapr_tce_reset()
  macio: set res_count value to 0 after non-block ATAPI DMA transfers
  spapr: Correctly set query_hotpluggable_cpus hook based on machine version

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 11:32:01 +01:00
Peter Maydell
cf5198d580 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160805' into staging
indirect register lowering

# gpg: Signature made Fri 05 Aug 2016 17:34:53 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160805:
  tcg: Lower indirect registers in a separate pass
  tcg: Require liveness analysis
  tcg: Include liveness info in the dumps
  tcg: Compress dead_temps and mem_temps into a single array
  tcg: Fold life data into TCGOp
  tcg: Reorg TCGOp chaining
  tcg: Compress liveness data to 16 bits

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-08 10:39:18 +01:00
Markus Armbruster
7ea7d36e34 error: Fix error_printf() calls lacking newlines
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:27 +02:00
Markus Armbruster
fea1c0999a vfio: Use error_report() instead of error_printf() for errors
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:18 +02:00
Markus Armbruster
a47eb01098 checkpatch: Fix newline detection in error_setg() & friends
Commit 5d596c2's regexp assumes the error message string is the first
argument.  Correct for error_report(), wrong for all the others.
Relax the regexp to match newline in anywhere.  This might cause
additional false positives.

While there, update the list of error_reporting functions.

Cc: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:01:08 +02:00
Markus Armbruster
df3c286c53 error: Strip trailing '\n' from error string arguments (again)
Commit 9af9e0f, 6daf194d, be62a2eb and 312fd5f got rid of a bunch, but
they keep coming back.  checkpatch.pl tries to flag them since commit
5d596c2, but it's not very good at it.  Offenders tracked down with
Coccinelle script scripts/coccinelle/err-bad-newline.cocci, an updated
version of the script from commit 312fd5f.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1470224274-31522-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 09:00:44 +02:00
David Gibson
57c0eb1e0d spapr: Fix undefined behaviour in spapr_tce_reset()
When a TCE table (sPAPR IOMMU context) is in disabled state (which is true
by default for the 64-bit window), it has tcet->nb_table == 0 and
tcet->table == NULL.  However, on system reset, spapr_tce_reset() executes,
which unconditionally calls
        memset(tcet->table, 0, table_size);

We get away with this in practice, because it's a zero length memset(),
but memset() on a NULL pointer is undefined behaviour, so we should not
call it in this case.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 10:06:25 +10:00
Mark Cave-Ayland
16275edb34 macio: set res_count value to 0 after non-block ATAPI DMA transfers
res_count should be set to the number of outstanding bytes after a DBDMA
request. Unfortunately this wasn't being set to zero by the non-block
transfer codepath meaning drivers that checked the descriptor result for
such requests (e.g reading the CDROM TOC) would assume from a non-zero result
that the transfer had failed.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 09:45:03 +10:00
David Gibson
3c0c47e346 spapr: Correctly set query_hotpluggable_cpus hook based on machine version
Prior to c8721d3 "spapr: Error out when CPU hotplug is attempted on older
pseries machines", attempting to use query-hotpluggable-cpus on pseries-2.6
and earlier machine types would SEGV.

That change fixed that, but due to some unexpected interactions in init
order and a brown-paper-bag worthy failure to test, it accidentally
disabled query-hotpluggable-cpus for all pseries machine types, including
the current one which should allow it.

In fact, query_hotpluggable_cpus needs to be non-NULL when and only when
the dr_cpu_enabled flag in sPAPRMachineClass is set, which makes
dr_cpu_enabled itself redundant.

This patch removes dr_cpu_enabled, instead directly setting
query_hotpluggable_cpus from the machine class_init functions, and using
that to determine the availability of CPU hotplug when necessary.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-08 09:45:03 +10:00
Marc-André Lureau
5839df7b71 ahci: fix sglist leak on retry
ahci-test /x86_64/ahci/io/dma/lba28/retry triggers the following leak:

Direct leak of 16 byte(s) in 1 object(s) allocated from:
    #0 0x7fc4b2a25e20 in malloc (/lib64/libasan.so.3+0xc6e20)
    #1 0x7fc4993bce58 in g_malloc (/lib64/libglib-2.0.so.0+0x4ee58)
    #2 0x556a187d4b34 in ahci_populate_sglist hw/ide/ahci.c:896
    #3 0x556a187d8237 in ahci_dma_prepare_buf hw/ide/ahci.c:1367
    #4 0x556a187b5a1a in ide_dma_cb hw/ide/core.c:844
    #5 0x556a187d7eec in ahci_start_dma hw/ide/ahci.c:1333
    #6 0x556a187b650b in ide_start_dma hw/ide/core.c:921
    #7 0x556a187b61e6 in ide_sector_start_dma hw/ide/core.c:911
    #8 0x556a187b9e26 in cmd_write_dma hw/ide/core.c:1486
    #9 0x556a187bd519 in ide_exec_cmd hw/ide/core.c:2027
    #10 0x556a187d71c5 in handle_reg_h2d_fis hw/ide/ahci.c:1204
    #11 0x556a187d7681 in handle_cmd hw/ide/ahci.c:1254
    #12 0x556a187d168a in check_cmd hw/ide/ahci.c:510
    #13 0x556a187d0afc in ahci_port_write hw/ide/ahci.c:314
    #14 0x556a187d105d in ahci_mem_write hw/ide/ahci.c:435
    #15 0x556a1831d959 in memory_region_write_accessor /home/elmarco/src/qemu/memory.c:525
    #16 0x556a1831dc35 in access_with_adjusted_size /home/elmarco/src/qemu/memory.c:591
    #17 0x556a18323ce3 in memory_region_dispatch_write /home/elmarco/src/qemu/memory.c:1262
    #18 0x556a1828cf67 in address_space_write_continue /home/elmarco/src/qemu/exec.c:2578
    #19 0x556a1828d20b in address_space_write /home/elmarco/src/qemu/exec.c:2635
    #20 0x556a1828d92b in address_space_rw /home/elmarco/src/qemu/exec.c:2737
    #21 0x556a1828daf7 in cpu_physical_memory_rw /home/elmarco/src/qemu/exec.c:2746
    #22 0x556a183068d3 in cpu_physical_memory_write /home/elmarco/src/qemu/include/exec/cpu-common.h:72
    #23 0x556a18308194 in qtest_process_command /home/elmarco/src/qemu/qtest.c:382
    #24 0x556a18309999 in qtest_process_inbuf /home/elmarco/src/qemu/qtest.c:573
    #25 0x556a18309a4a in qtest_read /home/elmarco/src/qemu/qtest.c:585
    #26 0x556a18598b85 in qemu_chr_be_write_impl /home/elmarco/src/qemu/qemu-char.c:387
    #27 0x556a18598c52 in qemu_chr_be_write /home/elmarco/src/qemu/qemu-char.c:399
    #28 0x556a185a2afa in tcp_chr_read /home/elmarco/src/qemu/qemu-char.c:2902
    #29 0x556a18cbaf52 in qio_channel_fd_source_dispatch io/channel-watch.c:84

Follow John Snow recommendation:
  Everywhere else ncq_err is used, it is accompanied by a list cleanup
  except for ncq_cb, which is the case you are fixing here.

  Move the sglist destruction inside of ncq_err and then delete it from
  the other two locations to keep it tidy.

  Call dma_buf_commit in ide_dma_cb after the early return. Though, this
  is also a little wonky because this routine does more than clear the
  list, but it is at the moment the centralized "we're done with the
  sglist" function and none of the other side effects that occur in
  dma_buf_commit will interfere with the reset that occurs from
  ide_restart_bh, I think

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2016-08-08 00:00:41 +04:00
Marc-André Lureau
9ef617246b usb: free leaking path
qdev_get_dev_path() returns an allocated string, free it when no longer
needed.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:36 +04:00
Marc-André Lureau
ec507f1123 usb: free USBDevice.strings
The list is created during instance init and further populated with
usb_desc_set_string(). Clear it when unrealizing the device.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:32 +04:00
Marc-André Lureau
0137a557aa virtio-input: free config list
Clear the list when finalizing. The list is created during realize with
virtio_input_idstr_config() and later by further calls to
virtio_input_init_config() and virtio_input_add_config().

This leak can be reproduced with device-introspect-test -p
/x86_64/device/introspect/concrete.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-08 00:00:28 +04:00
Marc-André Lureau
df37dd6ffe qjson: free str
Release the qstring allocated in qjson_new().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 00:00:24 +04:00
Marc-André Lureau
9d324b0e67 ahci: free irqs array
Each irq is referenced by the IDEBus in ide_init2(), thus we can free
the no longer used array.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Acked-by: John Snow <jsnow@redhat.com>
2016-08-08 00:00:20 +04:00
Marc-André Lureau
1371a36936 char: free MuxDriver when closing
Similarly to other chr_close callbacks, free char type specific data.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-08 00:00:15 +04:00
Marc-André Lureau
5b498459b4 char: free the tcp connection data when closing
Make sure the connection data got freed when closing the chardev, to
avoid leaks. Introduce tcp_chr_free_connection() to clean all connection
related data, and move some tcp_chr_close() clean-ups there.

(while at it, set write_msgfds_num to 0 when clearing array in
tcp_set_msgfds())

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-08 00:00:11 +04:00
Marc-André Lureau
157e94e8a2 numa: do not leak NumaOptions
In all cases, call qapi_free_NumaOptions(), by using a common ending
block.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2016-08-07 23:59:59 +04:00
Richard Henderson
5a18407f55 tcg: Lower indirect registers in a separate pass
Rather than rely on recursion during the middle of register allocation,
lower indirect registers to loads and stores off the indirect base into
plain temps.

For an x86_64 host, with sufficient registers, this results in identical
code, modulo the actual register assignments.

For an i686 host, with insufficient registers, this means that temps can
be (temporarily) spilled to the stack in order to satisfy an allocation.
This as opposed to the possibility of not being able to spill, to allocate
a register for the indirect base, in order to perform a spill.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
c0ef05b5e6 tcg: Require liveness analysis
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
bdfb460ef7 tcg: Include liveness info in the dumps
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
c70fbf0a99 tcg: Compress dead_temps and mem_temps into a single array
We only need two bits per temporary.  Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
bee158cb4d tcg: Fold life data into TCGOp
Reduce the size of other bitfields to make room.
This reduces the cache footprint of compilation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:40 +05:30
Richard Henderson
dcb8e75870 tcg: Reorg TCGOp chaining
Instead of using -1 as end of chain, use 0, and link through the 0
entry as a fully circular double-linked list.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:18 +05:30
Richard Henderson
a1b3c48d2b tcg: Compress liveness data to 16 bits
This reduces both memory usage and per-insn cacheline usage
during code generation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-08-05 21:44:17 +05:30
Peter Maydell
51009170d8 tests: Rename qtests which have names ending "error"
We have three qtest tests which have test names ending with "error".
This is awkward because the output of verbose test runs looks like
  /crypto/task/error:                                                  OK
  /crypto/task/thread_error:                                           OK

which gives false positives if you are grepping build logs for
errors by looking for "error:". Since there are only three tests
with this problem, just rename them all to 'failure' instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470307178-22848-1-git-send-email-peter.maydell@linaro.org
2016-08-05 15:27:15 +01:00
Stefan Weil
c025f689a1 wxx: Fix handling of files used for character devices
On Windows, such files were not truncated like on all other hosts.
Now we also test whether truncation is needed when running on Windows.

The append case was also incorrect because it needs a different value
for the desired access mode.

Reported-by: Benjamin David Lunt <fys@fysnet.net>
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1470114877-1466-1-git-send-email-sw@weilnetz.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 14:15:14 +01:00
Peter Maydell
bd8eda537f Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches for 2.7.0-rc2

# gpg: Signature made Fri 05 Aug 2016 10:30:12 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  nvme: bump PCI revision
  nvme: fix identify to be NVMe 1.1 compliant
  block: Accept any target node for transactional blockdev-backup

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 13:05:29 +01:00
Peter Maydell
8bfa87a231 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri 05 Aug 2016 10:24:34 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  virtio-blk: Remove stale comment about draining
  virtio-blk: Release s->rq queue at system_reset
  throttle: Test burst limits lower than the normal limits
  throttle: Don't allow burst limits to be lower than the normal limits
  block/parallels: check new image size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 11:44:56 +01:00
Peter Maydell
fbe400d246 Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Fri 05 Aug 2016 09:58:50 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: Add "--enable-werror" to configure command line
  docker: Be compatible with older docker

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 11:16:30 +01:00
Peter Maydell
676e844d56 Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20160804-tag' into staging
Xen 2016/08/04

# gpg: Signature made Thu 04 Aug 2016 18:43:14 BST
# gpg:                using RSA key 0x894F8F4870E1AE90
# gpg: Good signature from "Stefano Stabellini <stefano.stabellini@eu.citrix.com>"
# Primary key fingerprint: D04E 33AB A51F 67BA 07D3  0AEA 894F 8F48 70E1 AE90

* remotes/sstabellini/tags/xen-20160804-tag:
  Xen PCI passthrough: fix passthrough failure when no interrupt pin

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-05 10:23:12 +01:00
Fam Zheng
27d1b87688 virtio-blk: Remove stale comment about draining
This is stale after commit 6e40b3bf (virtio-blk: Use blk_drain() to
drain IO requests), remove it.

Suggested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1470278654-13525-3-git-send-email-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Fam Zheng
26307f6aa4 virtio-blk: Release s->rq queue at system_reset
At system_reset, there is no point in retrying the queued request,
because the driver that issued the request won't be around any more.

Analyzed-by: Laszlo Ersek <lersek@redhat.com>
Reported-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1470278654-13525-2-git-send-email-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Alberto Garcia
5fc8c052ce throttle: Test burst limits lower than the normal limits
This checks that making FOO_max lower than FOO is not allowed.

We could also forbid having FOO_max == FOO, but that doesn't have
any odd side effects and it would require us to update several other
tests, so let's keep it simple.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 2f90f9ee58aa14b7bd985f67c5996b06e0ab6c19.1469693110.git.berto@igalia.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Alberto Garcia
aaa1e77ffa throttle: Don't allow burst limits to be lower than the normal limits
Setting FOO_max to a value that is lower than FOO does not make
sense, and it produces odd results depending on the value of
FOO_max_length. Although the user should not set that configuration
in the first place it's better to reject it explicitly.

https://bugzilla.redhat.com/show_bug.cgi?id=1355665

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reported-by: Gu Nini <ngu@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 663d5aca406060e31f80d8113f77b6feee63b919.1469693110.git.berto@igalia.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Klim Kireev
555a608c5d block/parallels: check new image size
Before this patch incorrect image could be created via qemu-img
(Example: qemu-img create -f parallels -o size=4096T hack.img),
incorrect images cannot be used due to overflow in main image structure.

This patch add check of size in image creation.

After reading size it compare it with UINT32_MAX * cluster_size.

Signed-off-by: Klim Kireev <proffk@virtuozzo.mipt.ru>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1469639300-12155-1-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 09:59:06 +01:00
Christoph Hellwig
47989f1447 nvme: bump PCI revision
The broken Identify implementation in earlier Qemu versions means we
need to blacklist it from issueing the NVMe 1.1 Identify Namespace List
command.  As we want to be able to use it in newer Qemu versions we need
a way to identify those.  Bump the PCI revision as a guest visible
indicator of this bug fix.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-05 10:56:08 +02:00
Christoph Hellwig
03035a23a3 nvme: fix identify to be NVMe 1.1 compliant
NVMe 1.1 requires devices to implement a Namespace List subcommand of
the identify command.  Qemu not only not implements this features, but
also misinterprets it as an Identify Controller request.  Due to this
any OS trying to use the Namespace List will fail the probe.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-08-05 10:55:52 +02:00
Kevin Wolf
39d990ac38 block: Accept any target node for transactional blockdev-backup
Commit 0d978913 changed blockdev-backup to accept arbitrary node names
instead of device names (i.e. root nodes) for the backup target.
However, it forgot to make the same change in transactions and to update
the documentation. This patch fixes these omissions.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-08-05 10:55:14 +02:00
Fam Zheng
4a93f78ed0 docker: Add "--enable-werror" to configure command line
We don't have .git in the docker checkout, add this to enable -Werror
explicitly.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1469453510-658-1-git-send-email-famz@redhat.com
2016-08-05 16:34:55 +08:00
Fam Zheng
95d203cd1e docker: Be compatible with older docker
By not using "--format" with docker images command.

The option is not available on RHEL 7 docker command. Use an awk
matching command instead.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470202928-3392-1-git-send-email-famz@redhat.com>
2016-08-05 16:34:52 +08:00
Bruce Rogers
0968c91ce0 Xen PCI passthrough: fix passthrough failure when no interrupt pin
Commit 5a11d0f7 mistakenly converted a log message into an error
condition when no pin interrupt is found for the pci device being
passed through. Revert that part of the commit.

Signed-off-by: Bruce Rogers <brogers@suse.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
2016-08-04 10:42:48 -07:00
Peter Maydell
42e0d60f16 Merge remote-tracking branch 'remotes/riku/tags/pull-linux-user-20160804' into staging
linux-user important fixes for 2.7

# gpg: Signature made Thu 04 Aug 2016 15:10:57 BST
# gpg:                using RSA key 0xB44890DEDE3C9BC0
# gpg: Good signature from "Riku Voipio <riku.voipio@iki.fi>"
# gpg:                 aka "Riku Voipio <riku.voipio@linaro.org>"
# Primary key fingerprint: FF82 03C8 C391 98AE 0581  41EF B448 90DE DE3C 9BC0

* remotes/riku/tags/pull-linux-user-20160804:
  linux-user: Handle brk() attempts with very large sizes
  linux-user: Fix target_semid_ds structure definition
  linux-user: Don't write off end of new_utsname buffer
  linux-user: Fix memchr() argument in open_self_cmdline()
  linux-user: Use correct alignment for long long on i386 guests

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-04 18:36:05 +01:00
Peter Maydell
ef4330c23b linux-user: Handle brk() attempts with very large sizes
In do_brk(), we were inadvertently truncating the size
of a requested brk() from the guest by putting it into an
'int' variable. This meant that we would incorrectly report
success back to the guest rather than a failed allocation,
typically resulting in the guest then segfaulting. Use
abi_ulong instead.

This fixes a crash in the '31370.cc' test in the gcc libstdc++ test
suite (the test case starts by trying to allocate a very large
size and reduces the size until the allocation succeeds).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:38:17 +03:00
Peter Maydell
005eb2ae1f linux-user: Fix target_semid_ds structure definition
The target_semid_ds structure is not correct for all
architectures: the padding fields should only exist for:
 * 32-bit ABIs
 * x86

It is also misnamed, since it is following the kernel
semid64_ds structure (QEMU doesn't support the legacy
semid_ds structure at all). Rename the struct, provide
a correct generic definition and allow the oddball x86
architecture to provide its own version.

This fixes broken SYSV semaphores for all our 64-bit
architectures except x86 and ppc.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:36:53 +03:00
Peter Maydell
332c9781f6 linux-user: Don't write off end of new_utsname buffer
Use g_strlcpy() rather than strcpy() to copy the uname string
into the structure we return to the guest for the uname syscall.
This avoids overrunning the buffer if the user passed us an
overlong string via the QEMU command line.

We fix a comment typo while we're in the neighbourhood.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:36:26 +03:00
Peter Maydell
ba4b3f668a linux-user: Fix memchr() argument in open_self_cmdline()
In open_self_cmdline() we look for a 0 in the buffer we read
from /prc/self/cmdline. We were incorrectly passing the length
of our buf[] array to memchr() as the length to search, rather
than the number of bytes we actually read into it, which could
be shorter. This was spotted by Coverity (because it could
result in our trying to pass a negative length argument to
write()).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:35:30 +03:00
Peter Maydell
d9fe91d868 linux-user: Use correct alignment for long long on i386 guests
For i386, the ABI specifies that 'long long' (8 byte values)
need only be 4 aligned, but we were requiring them to be
8-aligned. This meant we were laying out the target_epoll_event
structure wrongly. Add a suitable ifdef to abitypes.h to
specify the i386-specific alignment requirement.

Reported-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-08-04 16:34:59 +03:00
Peter Maydell
09704e6ded Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* xsetbv fix (x86 targets TCG)
* remove unused functions
* qht segfault and memory leak fixes
* NBD fixes
* Fix for non-power-of-2 discard granularity
* Memory hotplug fixes
* Migration regressions
* IOAPIC fixes and (disabled by default) EOI register support
* Various other small fixes

# gpg: Signature made Wed 03 Aug 2016 18:01:05 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (25 commits)
  util: Fix assertion in iov_copy() upon zero 'bytes' and non-zero 'offset'
  qdev: Fix use after free in qdev_init_nofail error path
  Reorganize help output of '-display' option
  x86: ioapic: add support for explicit EOI
  x86: ioapic: ignore level irq during processing
  apic: fix broken migration for kvm-apic
  fw_cfg: Make base type "fw_cfg" abstract
  block: Cater to iscsi with non-power-of-2 discard
  osdep: Document differences in rounding macros
  nbd: Limit nbdflags to 16 bits
  nbd: Fix bad flag detection on server
  i2c: fix migration regression introduced by broadcast support
  mptsas: really fix migration compatibility
  qdist: return "(empty)" instead of NULL when printing an empty dist
  qdist: use g_renew and g_new instead of g_realloc and g_malloc.
  qdist: fix memory leak during binning
  target-i386: fix typo in xsetbv implementation
  qht: do not segfault when gathering stats from an uninitialized qht
  util: Drop inet_listen()
  util: drop unix_nonblocking_connect()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-04 10:24:27 +01:00
Peter Maydell
29b2517ac7 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vnc-20160803-1' into staging
vnc: fixes for "-vnc none".

# gpg: Signature made Wed 03 Aug 2016 16:33:07 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vnc-20160803-1:
  vnc: ensure connection sharing/limits is always configured
  vnc: fix crash when vnc_server_info_get has an error
  vnc: don't crash getting server info if lsock is NULL

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 18:22:51 +01:00
Shmulik Ladkani
e911765cbb util: Fix assertion in iov_copy() upon zero 'bytes' and non-zero 'offset'
In cases where iov_copy() is passed with zero 'bytes' argument and a
non-zero 'offset' argument, nothing gets copied - as expected.

However no copy iterations are performed, so 'offset' is left
unaltered, leading to the final assert(offset == 0) to fail.

Instead, change the loop condition to continue as long as 'offset || bytes',
similar to other iov_* functions.

This ensures 'offset' gets zeroed (even if no actual copy is made),
unless it is beyond end of source iov - which is asserted.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
Message-Id: <1470130880-1050-1-git-send-email-shmulik.ladkani@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Fam Zheng
0d4104e576 qdev: Fix use after free in qdev_init_nofail error path
Since 69382d8b (qdev: Fix object reference leak in case device.realize()
fails), object_property_set_bool could release the object. The error
path wants the type name, so hold an reference before realizing it.

Cc: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1470109301-12966-1-git-send-email-famz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Robert Ho
f04ec5afbb Reorganize help output of '-display' option
The '-display' help information is not very correct. This patch sort
it a little.
Also, in its help information, reveals what implicit display option
will be chosen if no definition.

Signed-off-by: Robert Ho <robert.hu@intel.com>
Message-Id: <1469528231-26206-1-git-send-email-robert.hu@intel.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Peter Xu
20fd4b7b6d x86: ioapic: add support for explicit EOI
Some old Linux kernels (upstream before v4.0), or any released RHEL
kernels has problem in sending APIC EOI when IR is enabled. Meanwhile,
many of them only support explicit EOI for IOAPIC, which is only
introduced in IOAPIC version 0x20. This patch provide a way to boost
QEMU IOAPIC to version 0x20, in order for QEMU to correctly receive EOI
messages.

Without boosting IOAPIC version to 0x20, kernels before commit d32932d
("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces")
will have trouble enabling both IR and level-triggered interrupt devices
(like e1000).

To upgrade IOAPIC to version 0x20, we need to specify:

  -global ioapic.version=0x20

To be compatible with old systems, 0x11 will still be the default IOAPIC
version. Here 0x11 and 0x20 are the only versions to be supported.

One thing to mention: this patch only applies to emulated IOAPIC. It
does not affect kernel IOAPIC behavior.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1470059959-372-1-git-send-email-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Peter Xu
f99b86b949 x86: ioapic: ignore level irq during processing
For level triggered interrupts, we will get Remote IRR bit cleared after
guest kernel finished processing specific request. Before that, we
should ignore the same interrupt from triggering again.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1469974685-4144-1-git-send-email-peterx@redhat.com>
[Push new "if" up so that it covers KVM split irqchip as well. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Igor Mammedov
7298d4fd51 apic: fix broken migration for kvm-apic
commit f6e98444 (apic: Use apic_id as apic's migration instance_id)
breaks migration when in kernel irqchip is used for 2.6 and older
machine types.

It applies compat property only for userspace 'apic' type
instead of applying it to all apic types inherited from
'apic-common' type as it was supposed to do.

Fix it by setting compat property 'legacy-instance-id' for
'apic-common' type which affects inherited types (i.e. not
only 'apic' but also 'kvm-apic' types)

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469800542-11402-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Markus Armbruster
e061fa3ca9 fw_cfg: Make base type "fw_cfg" abstract
Missed when commit 5712db6 split off "fw_cfg_io" and "fw_cfg_mem".

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1469777353-9383-1-git-send-email-armbru@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Eric Blake
b8d0a9804d block: Cater to iscsi with non-power-of-2 discard
Dell Equallogic iSCSI SANs have a very unusual advertised geometry:

$ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0
wsnz:0
maximum compare and write length:1
optimal transfer length granularity:0
maximum transfer length:0
optimal transfer length:0
maximum prefetch xdread xdwrite transfer length:0
maximum unmap lba count:30720
maximum unmap block descriptor count:2
optimal unmap granularity:30720
ugavalid:1
unmap granularity alignment:0
maximum write same length:30720

which says that both the maximum and the optimal discard size
is 15M.  It is not immediately apparent if the device allows
discard requests not aligned to the optimal size, nor if it
allows discards at a finer granularity than the optimal size.

I tried to find details in the SCSI Commands Reference Manual
Rev. A on what valid values of maximum and optimal sizes are
permitted, but while that document mentions a "Block Limits
VPD Page", I couldn't actually find documentation of that page
or what values it would have, or if a SCSI device has an
advertisement of its minimal unmap granularity.  So it is not
obvious to me whether the Dell Equallogic device is compliance
with the SCSI specification.

Fortunately, it is easy enough to support non-power-of-2 sizing,
even if it means we are less efficient than truly possible when
targetting that device (for example, it means that we refuse to
unmap anything that is not a multiple of 15M and aligned to a
15M boundary, even if the device truly does support a smaller
granularity where unmapping actually works).

Reported-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1469129688-22848-5-git-send-email-eblake@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:57 +02:00
Eric Blake
e9fd416e66 osdep: Document differences in rounding macros
Make it obvious which macros are safe in which situations.

Useful since QEMU_ALIGN_UP and ROUND_UP both purport to do
the same thing, but differ on whether the alignment must be
a power of 2.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1469129688-22848-4-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Eric Blake
7423f41782 nbd: Limit nbdflags to 16 bits
Rather than asserting that nbdflags is within range, just give
it the correct type to begin with :)  nbdflags corresponds to
the per-export portion of NBD Protocol "transmission flags", which
is 16 bits in response to NBD_OPT_EXPORT_NAME and NBD_OPT_GO.

Furthermore, upstream NBD has never passed the global flags to
the kernel via ioctl(NBD_SET_FLAGS) (the ioctl was first
introduced in NBD 2.9.22; then a latent bug in NBD 3.1 actually
tried to OR the global flags with the transmission flags, with
the disaster that the addition of NBD_FLAG_NO_ZEROES in 3.9
caused all earlier NBD 3.x clients to treat every export as
read-only; NBD 3.10 and later intentionally clip things to 16
bits to pass only transmission flags).  Qemu should follow suit,
since the current two global flags (NBD_FLAG_FIXED_NEWSTYLE
and NBD_FLAG_NO_ZEROES) have no impact on the kernel's behavior
during transmission.

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>

Message-Id: <1469129688-22848-3-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Eric Blake
5bee0f4717 nbd: Fix bad flag detection on server
Commit ab7c548e added a check for invalid flags, but used an
early return on error instead of properly going through the
cleanup label.

Signed-off-by: Eric Blake <eblake@redhat.com>

Message-Id: <1469129688-22848-2-git-send-email-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Igor Mammedov
71ae65e552 i2c: fix migration regression introduced by broadcast support
QEMU fails migration with following error:

qemu-system-x86_64: Missing section footer for i2c_bus
qemu-system-x86_64: load of migration failed: Invalid argument

when migrating from:
  qemu-system-x86_64-v2.6.0 -m 256M rhel72.img -M pc-i440fx-2.6
to
  qemu-system-x86_64-v2.7.0-rc0 -m 256M rhel72.img -M pc-i440fx-2.6

Regression is added by commit 2293c27f (i2c: implement broadcast write)

Fix it by dropping 'broadcast' VMState introduced by 2293c27f and
reuse broadcast 0x00 address as broadcast flag in bus->saved_address.
Then if there were ongoing broadcast at migration time, set
bus->saved_address to it and at i2c_slave_post_load() time check
for it instead of transfering and using 'broadcast' VMState.

As result of reusing existing saved_address VMState, no compat
glue will be needed to keep forward/backward compatiblity. which
makes fix much less intrusive.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469623198-177227-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Paolo Bonzini
0b646f44d9 mptsas: really fix migration compatibility
Commit 2e2aa316 removed internal flag msi_in_use, but it
existed in vmstate.  Restore it for migration to older QEMU
versions.

Reported-by: Amit Shah <amit.shah@redhat.com>
Suggested-by: Amit Shah <amit.shah@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>
Cc: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Emilio G. Cota
11b7b07f8a qdist: return "(empty)" instead of NULL when printing an empty dist
Printf'ing a NULL string is undefined behaviour. Avoid it.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-4-git-send-email-cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:44:56 +02:00
Emilio G. Cota
071d405477 qdist: use g_renew and g_new instead of g_realloc and g_malloc.
This is safer against overflow.  g_renew is available in all
version of glib, while g_realloc_n is only available in 2.24.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-3-git-send-email-cota@braap.org>
[Rewritten to use g_new/g_renew. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-03 18:42:35 +02:00
Peter Maydell
6eac5f7bad Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160803-1' into staging
usb: bugfixes for xen-usb and ehci, mingw build fix.

# gpg: Signature made Wed 03 Aug 2016 14:04:26 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20160803-1:
  xen: use a common function for pv and hvm guest backend register calls
  xen: drain submit queue in xen-usb before removing device
  xen: when removing a backend don't remove many of them
  ehci: faster frame index calculation for skipped frames
  wxx: Fix compilation of host-libusb.c
  wxx: Fix compiler warning for host-libusb.c

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 14:25:10 +01:00
Daniel P. Berrange
12e29b1682 vnc: ensure connection sharing/limits is always configured
The connection sharing / limits are only set in the
vnc_display_open() method and so missed when VNC is running
with '-vnc none'. This in turn prevents clients being added
to the VNC server with the QMP "add_client" command.

This was introduced in

  commit e5f34cdd2d
  Author: Gerd Hoffmann <kraxel@redhat.com>
  Date:   Thu Oct 2 12:09:34 2014 +0200

      vnc: track & limit connections

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-4-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Daniel P. Berrange
3e7f136d8b vnc: fix crash when vnc_server_info_get has an error
The vnc_server_info_get will allocate the VncServerInfo
struct and then call vnc_init_basic_info_from_server_addr
to populate the basic fields. If this returns an error
though, the qapi_free_VncServerInfo call will then crash
because the VncServerInfo struct instance was not properly
NULL-initialized and thus contains random stack garbage.

 #0  0x00007f1987c8e6f5 in raise () at /lib64/libc.so.6
 #1  0x00007f1987c902fa in abort () at /lib64/libc.so.6
 #2  0x00007f1987ccf600 in __libc_message () at /lib64/libc.so.6
 #3  0x00007f1987cd7d4a in _int_free () at /lib64/libc.so.6
 #4  0x00007f1987cdb2ac in free () at /lib64/libc.so.6
 #5  0x00007f198b654f6e in g_free () at /lib64/libglib-2.0.so.0
 #6  0x0000559193cdcf54 in visit_type_str (v=v@entry=
     0x5591972f14b0, name=name@entry=0x559193de1e29 "host", obj=obj@entry=0x5591961dbfa0, errp=errp@entry=0x7fffd7899d80)
     at qapi/qapi-visit-core.c:255
 #7  0x0000559193cca8f3 in visit_type_VncBasicInfo_members (v=v@entry=
     0x5591972f14b0, obj=obj@entry=0x5591961dbfa0, errp=errp@entry=0x7fffd7899dc0) at qapi-visit.c:12307
 #8  0x0000559193ccb523 in visit_type_VncServerInfo_members (v=v@entry=
     0x5591972f14b0, obj=0x5591961dbfa0, errp=errp@entry=0x7fffd7899e00) at qapi-visit.c:12632
 #9  0x0000559193ccb60b in visit_type_VncServerInfo (v=v@entry=
     0x5591972f14b0, name=name@entry=0x0, obj=obj@entry=0x7fffd7899e48, errp=errp@entry=0x0) at qapi-visit.c:12658
 #10 0x0000559193cb53d8 in qapi_free_VncServerInfo (obj=<optimized out>) at qapi-types.c:3970
 #11 0x0000559193c1e6ba in vnc_server_info_get (vd=0x7f1951498010) at ui/vnc.c:233
 #12 0x0000559193c24275 in vnc_connect (vs=0x559197b2f200, vs=0x559197b2f200, event=QAPI_EVENT_VNC_CONNECTED) at ui/vnc.c:284
 #13 0x0000559193c24275 in vnc_connect (vd=vd@entry=0x7f1951498010, sioc=sioc@entry=0x559196bf9c00, skipauth=skipauth@entry=tru e, websocket=websocket@entry=false) at ui/vnc.c:3039
 #14 0x0000559193c25806 in vnc_display_add_client (id=<optimized out>, csock=<optimized out>, skipauth=<optimized out>)
     at ui/vnc.c:3877
 #15 0x0000559193a90c28 in qmp_marshal_add_client (args=<optimized out>, ret=<optimized out>, errp=0x7fffd7899f90)
     at qmp-marshal.c:105
 #16 0x000055919399c2b7 in handle_qmp_command (parser=<optimized out>, tokens=<optimized out>)
     at /home/berrange/src/virt/qemu/monitor.c:3971
 #17 0x0000559193ce3307 in json_message_process_token (lexer=0x559194ab0838, input=0x559194a6d940, type=JSON_RCURLY, x=111, y=1 2) at qobject/json-streamer.c:105
 #18 0x0000559193cfa90d in json_lexer_feed_char (lexer=lexer@entry=0x559194ab0838, ch=125 '}', flush=flush@entry=false)
     at qobject/json-lexer.c:319
 #19 0x0000559193cfaa1e in json_lexer_feed (lexer=0x559194ab0838, buffer=<optimized out>, size=<optimized out>)
     at qobject/json-lexer.c:369
 #20 0x0000559193ce33c9 in json_message_parser_feed (parser=<optimized out>, buffer=<optimized out>, size=<optimized out>)
     at qobject/json-streamer.c:124
 #21 0x000055919399a85b in monitor_qmp_read (opaque=<optimized out>, buf=<optimized out>, size=<optimized out>)
     at /home/berrange/src/virt/qemu/monitor.c:3987
 #22 0x0000559193a87d00 in tcp_chr_read (chan=<optimized out>, cond=<optimized out>, opaque=0x559194a7d900)
     at qemu-char.c:2895
 #23 0x00007f198b64f703 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
 #24 0x0000559193c484b3 in main_loop_wait () at main-loop.c:213
 #25 0x0000559193c484b3 in main_loop_wait (timeout=<optimized out>) at main-loop.c:258
 #26 0x0000559193c484b3 in main_loop_wait (nonblocking=<optimized out>) at main-loop.c:506
 #27 0x0000559193964c55 in main () at vl.c:1908
 #28 0x0000559193964c55 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4603

This was introduced in

  commit 98481bfcd6
  Author: Eric Blake <eblake@redhat.com>
  Date:   Mon Oct 26 16:34:45 2015 -0600

    vnc: Hoist allocation of VncBasicInfo to callers

which added error reporting for vnc_init_basic_info_from_server_addr
but didn't change the g_malloc calls to g_malloc0.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-3-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Daniel P. Berrange
624cdd46d7 vnc: don't crash getting server info if lsock is NULL
When VNC is started with '-vnc none' there will be no
listener socket present. When we try to populate the
VncServerInfo we'll crash accessing a NULL 'lsock'
field.

 #0  qio_channel_socket_get_local_address (ioc=0x0, errp=errp@entry=0x7ffd5b8aa0f0) at io/channel-socket.c:33
 #1  0x00007f4b9a297d6f in vnc_init_basic_info_from_server_addr (errp=0x7ffd5b8aa0f0, info=0x7f4b9d425460, ioc=<optimized out>)  at ui/vnc.c:146
 #2  vnc_server_info_get (vd=0x7f4b9e858000) at ui/vnc.c:223
 #3  0x00007f4b9a29d318 in vnc_qmp_event (vs=0x7f4b9ef82000, vs=0x7f4b9ef82000, event=QAPI_EVENT_VNC_CONNECTED) at ui/vnc.c:279
 #4  vnc_connect (vd=vd@entry=0x7f4b9e858000, sioc=sioc@entry=0x7f4b9e8b3a20, skipauth=skipauth@entry=true, websocket=websocket @entry=false) at ui/vnc.c:2994
 #5  0x00007f4b9a29e8c8 in vnc_display_add_client (id=<optimized out>, csock=<optimized out>, skipauth=<optimized out>) at ui/v nc.c:3825
 #6  0x00007f4b9a18d8a1 in qmp_marshal_add_client (args=<optimized out>, ret=<optimized out>, errp=0x7ffd5b8aa230) at qmp-marsh al.c:123
 #7  0x00007f4b9a0b53f5 in handle_qmp_command (parser=<optimized out>, tokens=<optimized out>) at /usr/src/debug/qemu-2.6.0/mon itor.c:3922
 #8  0x00007f4b9a348580 in json_message_process_token (lexer=0x7f4b9c78dfe8, input=0x7f4b9c7350e0, type=JSON_RCURLY, x=111, y=5 9) at qobject/json-streamer.c:94
 #9  0x00007f4b9a35cfeb in json_lexer_feed_char (lexer=lexer@entry=0x7f4b9c78dfe8, ch=125 '}', flush=flush@entry=false) at qobj ect/json-lexer.c:310
 #10 0x00007f4b9a35d0ae in json_lexer_feed (lexer=0x7f4b9c78dfe8, buffer=<optimized out>, size=<optimized out>) at qobject/json -lexer.c:360
 #11 0x00007f4b9a348679 in json_message_parser_feed (parser=<optimized out>, buffer=<optimized out>, size=<optimized out>) at q object/json-streamer.c:114
 #12 0x00007f4b9a0b3a1b in monitor_qmp_read (opaque=<optimized out>, buf=<optimized out>, size=<optimized out>) at /usr/src/deb ug/qemu-2.6.0/monitor.c:3938
 #13 0x00007f4b9a186751 in tcp_chr_read (chan=<optimized out>, cond=<optimized out>, opaque=0x7f4b9c7add40) at qemu-char.c:2895
 #14 0x00007f4b92b5c79a in g_main_context_dispatch () from /lib64/libglib-2.0.so.0
 #15 0x00007f4b9a2bb0c0 in glib_pollfds_poll () at main-loop.c:213
 #16 os_host_main_loop_wait (timeout=<optimized out>) at main-loop.c:258
 #17 main_loop_wait (nonblocking=<optimized out>) at main-loop.c:506
 #18 0x00007f4b9a0835cf in main_loop () at vl.c:1934
 #19 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4667

Do an upfront check for a NULL lsock and report an error to
the caller, which matches behaviour from before

  commit 04d2529da2
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Fri Feb 27 16:20:57 2015 +0000

    ui: convert VNC server to use QIOChannelSocket

where getsockname() would be given a FD value -1 and thus report
an error to the caller.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1470134726-15697-2-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 15:06:32 +02:00
Juergen Gross
0e39bb022b xen: use a common function for pv and hvm guest backend register calls
Instead of calling xen_be_register() for each supported backend type
for hvm and pv guests in their machine init functions use a common
function in order not to have to add new backends twice.

This at once fixes the error that hvm domains couldn't use the qusb
backend.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Message-id: 1470119552-16170-1-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 14:52:11 +02:00
Peter Maydell
0cb34ff32e Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-next-2016-08-03-v1' into staging
Merge qio-next 2016-08-03 v1

# gpg: Signature made Wed 03 Aug 2016 10:48:08 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qio-next-2016-08-03-v1:
  io: remove mistaken call to object_ref on QTask

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 13:43:28 +01:00
Peter Maydell
90f54472f4 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160803' into staging
qemu-2.7: ppc patch queue 2016-08-03

Here's the current set of patches (only 2) for spapr, ppc and related
things.  These are important bugfixes for the stabilizing 2.7 tree.

One is for a regression where confusion between x86 only and generic
KVM irq handling resulted in breakage on KVM/Power.  The other is
fixing (yet another) problem in the vcpu hotplug code: older pseries
machine types which don't support vcpu hotplug weren't correctly
advertising that, potentially leading to crashes or other problems.

# gpg: Signature made Wed 03 Aug 2016 06:23:40 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160803:
  kvm-irqchip: only commit route when irqchip is used
  spapr: Error out when CPU hotplug is attempted on older pseries machines

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-03 11:52:53 +01:00
Daniel P. Berrange
bc35d51077 io: remove mistaken call to object_ref on QTask
The QTask struct is just a standalone struct, not a QOM Object,
so calling object_ref() on it is not appropriate. This results
in mangling the 'destroy' field in the QTask struct, causing
the later call to qtask_free() to try to call the function
at address 0x1, with predictably segfault happy results.

There is in fact no need for ref counting with QTask, as the
call to qtask_abort() or qtask_complete() will automatically
free associated memory.

This fixes the crash shown in

  https://bugs.launchpad.net/qemu/+bug/1589923

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-08-03 10:28:50 +01:00
Juergen Gross
80440ea033 xen: drain submit queue in xen-usb before removing device
When unplugging a device in the Xen pvusb backend drain the submit
queue before deallocation of the control structures. Otherwise there
will be bogus memory accesses when I/O contracts are finished.

Correlated to this issue is the handling of cancel requests: a packet
cancelled will still lead to the call of complete, so add a flag
to the request indicating it should be just dropped on complete.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Anthony PERARD <anthony.perard@citrix.com>
Message-id: 1470140044-16492-3-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 10:29:10 +02:00
Juergen Gross
c8e36e865c xen: when removing a backend don't remove many of them
When a Xenstore watch fires indicating a backend has to be removed
don't remove all backends for that domain with the specified device
index, but just the one which has the correct type.

The easiest way to achieve this is to use the already determined
xendev as parameter for xen_be_del_xendev() instead of only the domid
and device index.

This at once removes the open coded QTAILQ_FOREACH_SAVE() in
xen_be_del_xendev() as there is no need to search for the correct
xendev any longer.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Message-id: 1470140044-16492-2-git-send-email-jgross@suse.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-03 10:29:10 +02:00
Peter Xu
7005f7f81c kvm-irqchip: only commit route when irqchip is used
Reported from Alexey Kardashevskiy:

3f1fea0fb5 "kvm-irqchip: do explicit commit when update irq" produces
a crash on pseries guest running with VFIO on POWER8 machine as it does
not support KVM_CAP_IRQCHIP (KVM_CAP_IRQ_XICS is there instead). At the
result, KVMState::irq_routes is NULL when VFIO calls
kvm_irqchip_commit_routes.

This makes the routing update conditional.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-03 13:25:44 +10:00
Bharata B Rao
c8721d3599 spapr: Error out when CPU hotplug is attempted on older pseries machines
CPU hotplug and coldplug aren't supported prior to pseries-2.7.  Further,
earlier machine types don't use CPU core objects at all.  These mean that
query-hotpluggable-cpus and coldplug on older pseries machines will crash
QEMU.  It also means that hotpluggable_cpus flag in query-machines will
be incorrectly set to true for pseries < 2.7, since it is based on the
presence of the query_hotpluggable_cpus hook.

- Don't assign the query_hotpluggable_cpus hook for pseries < 2.7
- query_hotpluggable_cpus should therefore never be called on pseries <
  2.7, so add an assert
- spapr_core_pre_plug() should fail hot/cold plug attempts for pseries <
  2.7, since core objects are never used there
- spapr_core_plug() should therefore never be called for pseries < 2.7, so
  add an assert.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
[dwg: Change from query_hotpluggable_cpus returning NULL for pseries < 2.7
 to not being called at all, reword commit message for accuracy]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-08-03 13:08:54 +10:00
Peter Maydell
8b54a6a6c6 Merge remote-tracking branch 'remotes/ehabkost/tags/numa-pull-request' into staging
MAINTAINERS: Add Host Memory Backends section

# gpg: Signature made Tue 02 Aug 2016 12:24:56 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/numa-pull-request:
  MAINTAINERS: Add Host Memory Backends section

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-02 12:55:12 +01:00
Evgeny Yakovlev
72aa364b1d ehci: faster frame index calculation for skipped frames
ehci_update_frindex takes time linearly proportional to a number
of uframes to calculate new frame index and raise FLR interrupts,
which is a problem for large amounts of uframes.

If we experience large delays between echi timer callbacks (i.e. because
other periodic handlers have taken a lot of time to complete) we
get a lot of skipped frames which then delay ehci timer callback more
and this leads to deadlocking the system when ehci schedules next
callback to be too soon.

Observable behaviour is qemu consuming 100% host CPU time while guest
is unresponsive. This misbehavior could happen for a while and QEMU does
not get out from this state automatically without the patch.

This change makes ehci_update_frindex execute in constant time.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Message-id: 1469638520-32706-1-git-send-email-den@openvz.org
CC: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:35:24 +02:00
Stefan Weil
c16e366464 wxx: Fix compilation of host-libusb.c
libusb.h uses the WINAPI calling convention for all function callbacks.

Cross compilation with Mingw-w64 on Cygwin fails when this calling
convention is missing.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1469775331-7468-1-git-send-email-sw@weilnetz.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:33:47 +02:00
Stefan Weil
3bf2b3a172 wxx: Fix compiler warning for host-libusb.c
The local variable i is unsed for Windows.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1469775569-7869-1-git-send-email-sw@weilnetz.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-08-02 13:33:47 +02:00
Eduardo Habkost
4fc264f49c MAINTAINERS: Add Host Memory Backends section
The hostmem code is closely related to the NUMA code, so I am
willing to handle patches to those files and share the work with
Igor (the original author of that code).

Acked-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-08-02 08:22:02 -03:00
Emilio G. Cota
f9dbc19e8b qdist: fix memory leak during binning
In qdist_bin__internal(), to->entries is initialized to a 1-element array,
which we then leak when n == from->n. Fix it.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469459025-23606-2-git-send-email-cota@braap.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Dave Hansen
ba03584f4f target-i386: fix typo in xsetbv implementation
QEMU 2.6 added support for the XSAVE family of instructions, which
includes the XSETBV instruction which allows setting the XCR0
register.

But, when booting Linux kernels with XSAVE support enabled, I was
getting very early crashes where the instruction pointer was set
to 0x3.  I tracked it down to a jump instruction generated by this:

        gen_jmp_im(s->pc - pc_start);

where s->pc is pointing to the instruction after XSETBV and pc_start
is pointing _at_ XSETBV.  Subtract the two and you get 0x3.  Whoops.

The fix is to replace this typo with the pattern found everywhere
else in the file when folks want to end the translation buffer.

Richard Henderson confirmed that this is a bug and that this is the
correct fix.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: qemu-stable@nongnu.org
Cc: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Emilio G. Cota
7266ae91a1 qht: do not segfault when gathering stats from an uninitialized qht
So far, QHT functions assume that the passed qht has previously been
initialized--otherwise they segfault.

This patch makes an exception for qht_statistics_init, with the goal
of simplifying calling code. For instance, qht_statistics_init is
called from the 'info jit' dump, and given that under KVM the TB qht
is never initialized, we get a segfault. Thus, instead of complicating
the 'info jit' code with additional checks, let's allow passing an
uninitialized qht to qht_statistics_init.

While at it, add a test for this to test-qht.

Before the patch (for $ qemu -enable-kvm [...]):
(qemu) info jit
[...]
direct jump count   0 (0%) (2 jumps=0 0%)
Program received signal SIGSEGV, Segmentation fault.

After the patch the "TB hash buckets", "TB hash occupancy"
and "TB hash avg chain" lines are omitted.
(qemu) info jit
[...]
direct jump count   0 (0%) (2 jumps=0 0%)
TB hash buckets     0/0 (-nan% head buckets used)
TB hash occupancy   nan% avg chain occ. Histogram: (null)
TB hash avg chain   nan buckets. Histogram: (null)
[...]

Reported by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1469205390-14369-1-git-send-email-cota@braap.org>
[Extract printing statistics to an entirely separate function. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
767db021bc util: Drop inet_listen()
Since commit e65c67e4, inet_listen() is not used anymore, and all
inet listen operation goes through QIOChannel.

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Eric Blake <eblake@redhat.com>

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469451771-1173-3-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
f8ea7a8656 util: drop unix_nonblocking_connect()
It is never used; all nonblocking connect now goes through
socket_connect(), which calls unix_connect_addr().

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469097213-26441-3-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Cao jin
00432b6953 util: drop inet_nonblocking_connect()
It is never used; all nonblocking connect now goes through
socket_connect(), which calls inet_connect_addr().

Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1469097213-26441-2-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Paolo Bonzini
3f822cff44 checkpatch: add check for bzero
Tested-By: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Igor Mammedov
056b68af77 fix qemu exit on memory hotplug when allocation fails at prealloc time
When adding hostmem backend at runtime, QEMU might exit with error:
  "os_mem_prealloc: Insufficient free host memory pages available to allocate guest RAM"

It happens due to os_mem_prealloc() not handling errors gracefully.

Fix it by passing errp argument so that os_mem_prealloc() could
report error to callers and undo performed allocation when
os_mem_prealloc() fails.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <1469008443-72059-1-git-send-email-imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Greg Kurz
0b21757124 numa: set the memory backend "is_mapped" field
Commit 2aece63 "hostmem: detect host backend memory is being used properly"
added a way to know if a memory backend is busy or available for use. It
caused a slight regression if we pass the same backend to a NUMA node and
to a pc-dimm device:

-m 1G,slots=2,maxmem=2G \
-object memory-backend-ram,size=1G,id=mem-mem1 \
-device pc-dimm,id=dimm-mem1,memdev=mem-mem1 \
-numa node,nodeid=0,memdev=mem-mem1

Before commit 2aece63, this would cause QEMU to print an error message and
to exit gracefully:

qemu-system-ppc64: -device pc-dimm,id=dimm-mem1,memdev=mem-mem1:
    can't use already busy memdev: mem-mem1

Since commit 2aece63, QEMU hits an assertion in the memory code:

qemu-system-ppc64: memory.c:1934: memory_region_add_subregion_common:
    Assertion `!subregion->container' failed.
Aborted

This happens because pc-dimm devices don't use memory_region_is_mapped()
anymore and cannot guess the backend is already used by a NUMA node.

Let's revert to the previous behavior by turning the NUMA code to also
call host_memory_backend_set_mapped() when it uses a backend.

Fixes: 2aece63c8a
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <146891691503.15642.9817215371777203794.stgit@bahia.lan>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Paolo Bonzini
34506b30e4 util/qht: Document memory ordering assumptions
It is naturally expected that some memory ordering should be provided
around qht_insert() and qht_lookup(). Document these assumptions in the
header file and put some comments in the source to denote how that
memory ordering requirements are fulfilled.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Sergey Fedorov: commit title and message provided;
comment on qht_remove() elided]
Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Message-Id: <20160715175852.30749-2-sergey.fedorov@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-08-02 12:03:58 +02:00
Alistair Francis
cc0100f464 MAINTAINERS: Update the Xilinx maintainers
Update the Xilinx maintainers documentation to simplify what we maintain
and cover all of our upstream code.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-01 15:31:32 +01:00
Sean Bruno
ded554cdb4 Fix bsd-user build errors after 8642c1b81e
LINK  sparc-bsd-user/qemu-sparc
bsd-user/main.o: In function `cpu_loop':
/home/sbruno/bsd/qemu/bsd-user/main.c:515: undefined reference to `cpu_sparc_exec'
c++: error: linker command failed with exit code 1 (use -v to see invocation)
gmake[1]: *** [Makefile:197: qemu-sparc] Error 1
gmake: *** [Makefile:204: subdir-sparc-bsd-user] Error 2

  LINK  i386-bsd-user/qemu-i386
bsd-user/main.o: In function `cpu_loop':
/home/sbruno/bsd/qemu/bsd-user/main.c:174: undefined reference to `cpu_x86_exec'
c++: error: linker command failed with exit code 1 (use -v to see invocation)
gmake[1]: *** [Makefile:197: qemu-i386] Error 1
gmake: *** [Makefile:204: subdir-i386-bsd-user] Error 2

Signed-off-by:  Sean Bruno <sbruno@freebsd.org>
Message-id: 20160729160235.64525-1-sbruno@freebsd.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-01 14:30:31 +01:00
Peter Maydell
69d490079f Update version for v2.7.0-rc1 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 15:55:59 +01:00
Aaron Lindsay
71fcd8eb68 avx2 configure: Disable if static build
This avoids a segfault like the following for at least some 4.8 versions
of gcc when configured with --static if avx2 instructions are also
enabled:

	Program received signal SIGSEGV, Segmentation fault.
	buffer_find_nonzero_offset_ifunc () at ./util/cutils.c:333
	333     {
	(gdb) bt
	#0  buffer_find_nonzero_offset_ifunc () at ./util/cutils.c:333
	#1  0x0000000000939c58 in __libc_start_main ()
	#2  0x0000000000419337 in _start ()

Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 15:21:43 +01:00
Sean Bruno
cf4b61d581 Unbreak FreeBSD build after optionrom update.
Update the build flags appropriately for FreeBSD and add the
correct LD_EMULATION type for the FreeBSD build case.

Fixes FreeBSD build error:
	ld: unrecognised emulation mode: elf_i386
	Supported emulations: elf_x86_64_fbsd elf_i386_fbsd
	gmake[1]: *** [Makefile:51: linuxboot_dma.img] Error 1
	gmake: *** [Makefile:229: romsubdir-optionrom] Error 2

Signed-off-by: Sean Bruno <sbruno@freebsd.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 14:25:35 +01:00
Paolo Bonzini
036999e93e optionrom: fix detection of -Wa,-32
The cc-option macro runs $(CC) in -S mode (generate assembly) to avoid a
pointless run of the assembler.  However, this does not work when you want
to detect support for cc->as option passthrough.  clang ignores -Wa unless
-c is provided, and exits successfully even if the -Wa,-32 option is not
supported.

Reported-by: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1469043409-14033-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 13:56:52 +01:00
Peter Maydell
c7e9aafe5c Merge remote-tracking branch 'remotes/lalrae/tags/mips-20160729' into staging
MIPS patches 2016-07-29

Changes:
* bug fixes

# gpg: Signature made Fri 29 Jul 2016 09:44:13 BST
# gpg:                using RSA key 0x52118E3C0B29DA6B
# gpg: Good signature from "Leon Alrae <leon.alrae@imgtec.com>"
# Primary key fingerprint: 8DD3 2F98 5495 9D66 35D4  4FC0 5211 8E3C 0B29 DA6B

* remotes/lalrae/tags/mips-20160729:
  target-mips: fix EntryHi.EHINV being cleared on TLB exception
  hw/mips_malta: Fix YAMON API print routine

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 13:05:55 +01:00
Peter Maydell
df2c35902e Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160729' into staging
ppc patch queue 2016-07-29

Here are the current pending ppc and spapr related patches for
qemu-2.7.  Given the freeze status, these are all bugfixes, with two
exceptions:

  * There's some final rework of the vcpu hotplug model.  Specifically
    we add spapr specific code on the generic basis Igor established
    to make cpu_index stable for pseries-2.7 and later machine types.
      - This allows us to remove the limitation that cpu cores had to
        be inserted in linear order, and removed in LIFO order.
      - This is worth merging this late in 2.7 because it will avoid
        considerable future grief with management layers needing to
        discover whether out-of-order hotplug is possible, amongst
        other things.
      - For now we do add a constraint that the initial cpu cannot be
        unplugged.
  * We add two extra testcases to make check, for postcopy and
    drive_del on ppc64.
      - Not strictly bugfixes, but safe, because they don't affect the
        actual code, and increase test coverage.

# gpg: Signature made Fri 29 Jul 2016 05:50:02 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160729:
  tests: add drive_del-test to ppc/ppc64
  spapr: Prevent boot CPU core removal
  ppc: Fix fault PC reporting for lve*/stve* VMX instructions
  test: port postcopy test to ppc64
  Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
  spapr: init CPUState->cpu_index with index relative to core-id

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 12:37:08 +01:00
Peter Maydell
cbe81c6331 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: cleanups, fixes

a bunch of bugfixes and a couple of cleanups
making these easier and/or making debugging easier

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 29 Jul 2016 04:11:01 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (41 commits)
  mptsas: Fix a migration compatible issue
  vhost: do not update last avail idx on get_vring_base() failure
  vhost: add vhost_net_set_backend()
  vhost-user: add error report in vhost_user_write()
  tests: fix vhost-user-test leak
  tests: plug some leaks in virtio-net-test
  vhost-user: wait until backend init is completed
  char: add and use tcp_chr_wait_connected
  char: add chr_wait_connected callback
  vhost: add assert() to check runtime behaviour
  vhost-net: vhost_migration_done is vhost-user specific
  Revert "vhost-net: do not crash if backend is not present"
  vhost-user: add get_vhost_net() assertions
  vhost-user: keep vhost_net after a disconnection
  vhost-user: check vhost_user_{read,write}() return value
  vhost-user: check qemu_chr_fe_set_msgfds() return value
  vhost-user: call set_msgfds unconditionally
  qemu-char: fix qemu_chr_fe_set_msgfds() crash when disconnected
  vhost: use error_report() instead of fprintf(stderr,...)
  vhost: add missing VHOST_OPS_DEBUG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 11:57:01 +01:00
Peter Maydell
aa2aac51f0 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Thu 28 Jul 2016 23:50:37 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  ide: fix halted IO segfault at reset

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-29 11:01:38 +01:00
Laurent Vivier
059ce0f00a tests: add drive_del-test to ppc/ppc64
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 14:14:15 +10:00
Cao jin
f077f88912 mptsas: Fix a migration compatible issue
My previous commit 2e2aa316 removed internal flag msi_in_use, which
exists in vmstate, use VMSTATE_UNUSED for migration compatibility.

Reported-by: Amit Shah <amit.shah@redhat.com>
Suggested-by: Amit Shah <amit.shah@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
2016-07-29 06:09:55 +03:00
Marc-André Lureau
499c557975 vhost: do not update last avail idx on get_vring_base() failure
The state.num value will probably be 0 in this case, but that
doesn't make sense to update.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 05:47:17 +03:00
Bharata B Rao
62be8b044a spapr: Prevent boot CPU core removal
Boot CPU is assumed to be always present in QEMU code. So
until that assumptions are gone, deny removal request.
In another words, QEMU won't support boot CPU core hot-unplug.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
[dwg: Tweaked error message for clarity]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
Benjamin Herrenschmidt
bcd510b141 ppc: Fix fault PC reporting for lve*/stve* VMX instructions
We forgot to do gen_update_nip() for these like we do with other
helpers. Fix this, but in a more efficient way by passing the RA
to the accessors instead so the overhead is only taken on faults.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
lvivier@redhat.com
aaf89c8a49 test: port postcopy test to ppc64
As userfaultfd syscall is available on powerpc, migration
postcopy can be used.

This patch adds the support needed to test this on powerpc,
instead of using a bootsector to run code to modify memory,
we use a FORTH script in "boot-command" property.

As spapr machine doesn't support "-prom-env" argument
(the nvram is initialized by SLOF and not by QEMU),
"boot-command" is provided to SLOF via a file mapped nvram
(with "-drive file=...,if=pflash")

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
David Gibson
7cdd76132a Revert "spapr: Ensure CPU cores are added contiguously and removed in LIFO order"
This reverts commit 5cbc64de25.

Now that we have stable cpu_index values for pseries-2.7 (and future)
machine types, we can now safely allow hotplug and unplug in any order.

Conflicts:
	hw/ppc/spapr_cpu_core.c

Some conflicts on revert due to some small changes in the inserted
code since the original commit.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
Igor Mammedov
b63578bdb5 spapr: init CPUState->cpu_index with index relative to core-id
It will enshure that cpu_index for a given cpu stays the same
regardless of the order cpus has been created/deleted and so
it would be possible to migrate QEMU instance with out of order
created CPU.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-29 12:02:31 +10:00
John Snow
87ac25fd1f ide: fix halted IO segfault at reset
If one attempts to perform a system_reset after a failed IO request
that causes the VM to enter a paused state, QEMU will segfault trying
to free up the pending IO requests.

These requests have already been completed and freed, though, so all
we need to do is NULL them before we enter the paused state.

Existing AHCI tests verify that halted requests are still resumed
successfully after a STOP event.

Analyzed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 1469635201-11918-2-git-send-email-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-28 17:34:19 -04:00
Marc-André Lureau
950d94ba06 vhost: add vhost_net_set_backend()
Not all vhost-user backends support ops->vhost_net_set_backend(). It is
a nicer to provide an assert/error than to crash trying to
call. Furthermore, it improves a bit the code by hiding vhost_ops
details.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
f6b8571041 vhost-user: add error report in vhost_user_write()
Similar to vhost_user_read() error report, it is useful to have early
error report.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
69179fe2fc tests: fix vhost-user-test leak
Spotted by valgrind.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
1ec3b71cde tests: plug some leaks in virtio-net-test
Found thanks to valgrind.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
c89804d674 vhost-user: wait until backend init is completed
The chardev waits for an initial connection before starting qemu, and
vhost-user should wait for the backend negotiation to be completed
before starting qemu too.

vhost-user is started in the net_vhost_user_event callback, which is
synchronously called after the socket is connected. Use a
VhostUserState.started flag to indicate vhost-user init completed
successfully and qemu can be started.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:49 +03:00
Marc-André Lureau
d7a04fd7d5 char: add and use tcp_chr_wait_connected
Add a chr_wait_connected for the tcp backend, and use it in the
open_socket() function.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
6b6723c3b5 char: add chr_wait_connected callback
A function to wait on the backend to be connected, to be used in the
following patches.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
8695de0fcf vhost: add assert() to check runtime behaviour
All these functions must be called only after the backend is connected.
They are called from virtio-net.c, after either virtio or link status
change.

The check for nc->peer->link_down should ensure vhost_net_{start,stop}()
are always called between vhost_user_{start,stop}().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
51f7aca973 vhost-net: vhost_migration_done is vhost-user specific
Either the callback is mandatory to implement, in which case an assert()
is more appropriate, or it's not and we can't tell much whether the
function should fail or not (given it's name, I guess it should silently
success by default). Instead, make the implementation mandatory and
vhost-user specific to be more clear about its usage.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
bb12e761e8 Revert "vhost-net: do not crash if backend is not present"
Now that get_vhost_net() returns non-null after a successful
vhost_net_init(), we no longer need to check this case.

This reverts commit ecd34898596c60f79886061618dd7e01001113ad.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
1a5b68cee8 vhost-user: add get_vhost_net() assertions
Add a few assertions to be more explicit about the runtime behaviour
after the previous patch: get_vhost_net() is non-null after
net_vhost_user_init().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
e6bcb1b617 vhost-user: keep vhost_net after a disconnection
Many code paths assume get_vhost_net() returns non-null.

Keep VhostUserState.vhost_net after a successful vhost_net_init(),
instead of freeing it in vhost_net_cleanup().

VhostUserState.vhost_net is thus freed before after being recreated or
on final vhost_user_cleanup() and there is no need to save the acked
features.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
c4843a45e3 vhost-user: check vhost_user_{read,write}() return value
The vhost-user code is quite inconsistent with error handling. Instead
of ignoring some return values of read/write and silently going on with
invalid state (invalid read for example), break the code flow when the
error happened.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
6fab2f3f60 vhost-user: check qemu_chr_fe_set_msgfds() return value
Check qemu_chr_fe_set_msgfds() for errors, to make sure the message to
be sent is correct.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:48 +03:00
Marc-André Lureau
df3485a148 vhost-user: call set_msgfds unconditionally
It is fine to call set_msgfds() with 0 fd, and ensures any previous fd
array is cleared.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
5c7eaabf65 qemu-char: fix qemu_chr_fe_set_msgfds() crash when disconnected
Calling qemu_chr_fe_set_msgfds() on unconnected socket leads to crash
since s->ioc is NULL in this case. Return an error earlier instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
4afba63120 vhost: use error_report() instead of fprintf(stderr,...)
Let's use qemu proper error reporting API, this ensures the error is
reported at the right place (stderr or monitor), with a conventional
format.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
c640969216 vhost: add missing VHOST_OPS_DEBUG
Add missing VHOST_OPS_DEBUG() logs, for completeness.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
162bba7fa8 vhost: do not assert() on vhost_ops failure
Calling a vhost operation may fail, for example with disconnected
vhost-user backend, but qemu shouldn't abort in this case.

Log an error instead, except on error and cleanup code paths where it
can be mostly ignored.

Let's use a VHOST_OPS_DEBUG macro to easily disable those messages once
disconnected backend stabilizes.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
a06db3ec72 vhost: fix calling vhost_dev_cleanup() after vhost_dev_init()
vhost_net_init() calls vhost_dev_init() and in case of failure, calls
vhost_dev_cleanup() directly. However, the structure is already
partially cleaned on error. Calling vhost_dev_cleanup() again will call
vhost_virtqueue_cleanup() on already clean queues, and causing potential
double-close. Instead, adjust dev->nvqs and simplify vhost_dev_init()
code to not call vhost_virtqueue_cleanup() but vhost_dev_cleanup()
instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
f1a0365b68 vhost-net: always call vhost_dev_cleanup() on failure
vhost_dev_init(), calling vhost backend initialization, should be
cleaned up after failure too. Call vhost_dev_cleanup() in all failure
cases. First, it needs to zero-alloc the struct to avoid the initial
garbage.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
e0547b59dc vhost: make vhost_dev_cleanup() idempotent
It is called on multiple code path, so make it safe to call several
times (note: I don't remember a reproducer here, but a function called
'cleanup' should probably be idempotent in my book)

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:47 +03:00
Marc-André Lureau
5be5f9be72 vhost: fix cleanup on not fully initialized device
If vhost_dev_init() failed, caller may still call vhost_dev_cleanup()
later. However, vhost_dev_cleanup() tries to remove the device from the
list even if it wasn't yet added, which may lead to crashes. Similarly
for the memory listener.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
7b527247f0 vhost: assert the log was cleaned up
Make sure the log was released on cleanup, or it will leak (the
alternative is to call vhost_log_put() unconditionally, but it may hide
some dev state issues).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
9e0bc24fa5 vhost: make vhost_log_put() idempotent
Although not strictly required, it is nice to have vhost_log_put()
safely callable multiple times.

Clear dev->log* when calling vhost_log_put() to make the function
idempotent. This also simplifies a bit the caller work.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
7cb8a9b9f2 vhost: don't assume opaque is a fd, use backend cleanup
vhost-dev opaque isn't necessarily an fd, it can be a chardev when using
vhost-user. Goto fail, so vhost_backend_cleanup() is called to handle
backend cleanup appropriately.

vhost_set_backend_type() should never fail, use an assert().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
9c7d18b3a5 vhost-user: disconnect on HUP
In some cases, qemu_chr_fe_read_all() on HUP event doesn't raise
CHR_EVENT_CLOSED because the read/recv function returns -1 on
disconnected peers (for example with tch_chr_recv, an ECONNRESET errno
overwritten as EIO).

It is simpler to explicitely disconnect on HUP, rising CHR_EVENT_CLOSED
if it wasn't disconnected already.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
d9d261142d vhost-user: minor simplification
Shorten the code and make it more clear by using the specialized
function g_str_has_prefix().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Marc-André Lureau
01edc230d9 misc: indentation
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:33:46 +03:00
Prasad J Pandit
1e7aed7014 virtio: check vring descriptor buffer length
virtio back end uses set of buffers to facilitate I/O operations.
An infinite loop unfolds in virtqueue_pop() if a buffer was
of zero size. Add check to avoid it.

Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-29 00:07:10 +03:00
Marcel Apfelbaum
9a4c0e220d hw/virtio-pci: fix virtio behaviour
Enable transitional virtio devices by default.
Enable virtio-1.0 for devices plugged into
PCIe ports (Root ports or Downstream ports).

Using the virtio-1 mode will remove the limitation
of the number of devices that can be attached to a machine
by removing the need for the IO BAR.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-29 00:07:10 +03:00
Wei Jiangang
be0d9760d7 apb: convert init to realize
Convert a device model where initialization obviously can't fail,
make it implement realize() rather than init().

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-07-29 00:07:09 +03:00
Wei Jiangang
86395eb31f hw/pci-bridge: Convert pxb initialization functions to Error
Firstly, convert pxb_dev_init_common() to Error and rename
it to pxb_dev_realize_common().
Actually, pxb_register_bus() is converted as well.

And then,
convert pxb_dev_initfn() and pxb_pcie_dev_initfn() to Error,
rename them to pxb_dev_realize() and pxb_pcie_dev_realize()
respectively.

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
16de88a416 hw/apci: handle 64-bit MMIO regions correctly
In build_crs(), the calculation and merging of the ranges already happens
in 64-bit, but the entry boundaries are silently truncated to 32-bit in the
call to aml_dword_memory(). Fix it by handling the 64-bit MMIO ranges separately.
This fixes 64-bit BARs behind PXBs.

Reported-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
2df5a7b52f acpi: refactor pxb crs computation
Instead of always passing both IO and MEM ranges when
computing CRS ranges, define a new CrsRangeSet structure
that include them both.

This is done before introducing a third type of range,
64-bit MEM, so it will be easier to pass them all around.

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
c99cb18eeb hw/acpi: fix a DSDT table issue when a pxb is present.
PXBs do not support hotplug so they don't have a PCNT function.
Since the PXB's PCI root-bus is a child bus of bus 0, the
build_dsdt code will add a call to the corresponding PCNT function.

Fix this by skipping the PCNT call for the above case.
While at it skip also PCIe child buses.

Reported-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
7b346c742c hw/pxb: declare pxb devices as not hot-pluggable
Prevent future issues when hotplug will work for devices
attached to pxbs.

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Marcel Apfelbaum
2c533c5479 hw/pcie-root-port: Fix PCIe root port initialization
Specify the root port interrupt pin as part of the init
process for cases when msi/msix are not enabled.

Fixes "hw/pci/pci.c:196:23: runtime error: shift exponent -1 is negative"
warning from clang's sanitizer.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:09 +03:00
Michael S. Tsirkin
6b4495401b pcie: fix link active status bit migration
We changed link status register in pci express endpoint capability
over time. Specifically,

commit b2101eae63 ("pcie: Set the "link
active" in the link status register") set data link layer link active
bit in this register without adding compatibility to old machine types.

When migrating from qemu 2.3 and older this affects xhci devices which
under machine type 2.0 and older have a pci express endpoint capability
even if they are on a pci bus.

Add compatibility flags to make this bit value match what it was under
2.3.

Additionally, to avoid breaking migration from qemu 2.3 and up,
suppress checking link status during migration: this seems sane
since hardware can change link status at any time.

https://bugzilla.redhat.com/show_bug.cgi?id=1352860

Reported-by: Gerd Hoffmann <kraxel@redhat.com>
Fixes: b2101eae63
    ("pcie: Set the "link active" in the link status register")
Cc: qemu-stable@nongnu.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-29 00:07:08 +03:00
Leon Alrae
701074a6fc target-mips: fix EntryHi.EHINV being cleared on TLB exception
While implementing TLB invalidation feature we forgot to modify
part of code responsible for updating EntryHi during TLB exception.
Consequently EntryHi.EHINV is unexpectedly cleared on the exception.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-28 11:24:02 +01:00
Paul Burton
7f81dbb9a0 hw/mips_malta: Fix YAMON API print routine
The print routine provided as part of the in-built bootloader had a bug
in that it attempted to use a jump instruction as part of a loop, but
the target has its upper bits zeroed leading to control flow
transferring to 0xb0000814 rather than the intended 0xbfc00814. Fix this
by using a branch instruction instead, which seems more fit for purpose.

A simple way to test this is to build a Linux kernel with EVA enabled &
attempt to boot it in QEMU. It will attempt to print a message
indicating the configuration mismatch but QEMU would previously
incorrectly jump & wind up printing a continuous stream of the letter E.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-28 11:24:00 +01:00
Peter Maydell
21a21b853a Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 and machine queue, 2016-07-27

Highlights:
* Fixes to allow CPU hotplug/unplug in any order;
* Exit QEMU on invalid global properties.

# gpg: Signature made Wed 27 Jul 2016 15:28:53 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  vl: exit if a bad property value is passed to -global
  qdev: ignore GlobalProperty.errp for hotplugged devices
  machine: Add comment to abort path in machine_set_kernel_irqchip
  Revert "pc: Enforce adding CPUs contiguously and removing them in opposite order"
  pc: Init CPUState->cpu_index with index in possible_cpus[]
  qdev: Fix object reference leak in case device.realize() fails
  exec: Set cpu_index only if it's not been explictly set
  exec: Don't use cpu_index to detect if cpu_exec_init()'s been called
  exec: Reduce CONFIG_USER_ONLY ifdeffenery

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 18:18:21 +01:00
Peter Maydell
51313fe4f4 Merge remote-tracking branch 'remotes/stefanha/tags/CVE-2016-5403-virtio-unbounded-allocation-pull-request' into staging
# gpg: Signature made Wed 27 Jul 2016 16:13:02 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/CVE-2016-5403-virtio-unbounded-allocation-pull-request:
  virtio: error out if guest exceeds virtqueue size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 17:26:07 +01:00
Peter Maydell
df5c50a208 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 26 Jul 2016 21:51:38 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  mirror: double performance of the bulk stage if the disc is full
  block/gluster: fix doc in the qapi schema and member name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-27 16:31:01 +01:00
Greg Kurz
03f28efbbb vl: exit if a bad property value is passed to -global
When passing '-global driver=host-powerpc64-cpu,property=compat,value=foo'
on the command line, without this patch, we get the following warning per
device (which means many lines if the guests has many cpus):

qemu-system-ppc64: Warning: can't apply global host-powerpc64-cpu.compat=foo:
    Invalid compatibility mode "foo"

... and QEMU continues execution, ignoring the property.

With this patch, we get a single line:

qemu-system-ppc64: can't apply global host-powerpc64-cpu.compat=foo:
    Invalid compatibility mode "foo"

... and QEMU exits.

The previous behavior is kept for hotplugged devices since we don't want
QEMU to exit when doing device_add.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Greg Kurz
b3443f43f4 qdev: ignore GlobalProperty.errp for hotplugged devices
This patch ensures QEMU won't terminate while hotplugging a device if the
global property cannot be set and errp points to error_fatal or error_abort.

While here, it also fixes indentation of the typename argument.

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Greg Kurz
78a3930685 machine: Add comment to abort path in machine_set_kernel_irqchip
We're not supposed to abort when the user passes a bogus value.
Since the checking is done in visit_type_OnOffSplit(), the call
to abort() is legitimate. Let's add a comment to make it
explicit.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-27 11:25:06 -03:00
Stefan Hajnoczi
afd9096eb1 virtio: error out if guest exceeds virtqueue size
A broken or malicious guest can submit more requests than the virtqueue
size permits, causing unbounded memory allocation in QEMU.

The guest can submit requests without bothering to wait for completion
and is therefore not bound by virtqueue size.  This requires reusing
vring descriptors in more than one request, which is not allowed by the
VIRTIO 1.0 specification.

In "3.2.1 Supplying Buffers to The Device", the VIRTIO 1.0 specification
says:

  1. The driver places the buffer into free descriptor(s) in the
     descriptor table, chaining as necessary

and

  Note that the above code does not take precautions against the
  available ring buffer wrapping around: this is not possible since the
  ring buffer is the same size as the descriptor table, so step (1) will
  prevent such a condition.

This implies that placing more buffers into the virtqueue than the
descriptor table size is not allowed.

QEMU is missing the check to prevent this case.  Processing a request
allocates a VirtQueueElement leading to unbounded memory allocation
controlled by the guest.

Exit with an error if the guest provides more requests than the
virtqueue size permits.  This bounds memory allocation and makes the
buggy guest visible to the user.

This patch fixes CVE-2016-5403 and was reported by Zhenhao Hong from 360
Marvel Team, China.

Reported-by: Zhenhao Hong <hongzhenhao@360.cn>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-27 14:04:40 +01:00
Vladimir Sementsov-Ogievskiy
0965a41e99 mirror: double performance of the bulk stage if the disc is full
Mirror can do up to 16 in-flight requests, but actually on full copy
(the whole source disk is non-zero) in-flight is always 1. This happens
as the request is not limited in size: the data occupies maximum available
capacity of s->buf.

The patch limits the size of the request to some artificial constant
(1 Mb here), which is not that big or small. This effectively enables
back parallelism in mirror code as it was designed.

The result is important: the time to migrate 10 Gb disk is reduced from
~350 sec to 170 sec.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468516741-82174-1-git-send-email-vsementsov@virtuozzo.com
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-26 16:23:36 -04:00
Prasanna Kumar Kalever
0a189ffb5e block/gluster: fix doc in the qapi schema and member name
1. qapi @BlockdevOptionsGluster schema member name s/debug_level/debug-level/
2. rearrange the versioning
3. s/server description/servers description/

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-Id: <1469198048-8535-1-git-send-email-prasanna.kalever@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-26 16:23:36 -04:00
Igor Mammedov
9527e7bde5 Revert "pc: Enforce adding CPUs contiguously and removing them in opposite order"
This reverts commit 4da7faaeb0.

Since commit:
  pc: init CPUState->cpu_index with index in possible_cpus[]
cpu_index is stable regardless of the order cpus were created
and QEMU instance stays migratable always so limitation added
by 4da7faaeb could be safely removed.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:13 -03:00
Igor Mammedov
a15d2728a9 pc: Init CPUState->cpu_index with index in possible_cpus[]
It will enshure that cpu_index for a given cpu stays the same
regardless of the order cpus has been created/deleted.

No compat code is needed as for initial cpus index in
possible_cpus[] matches cpu_index that's been auto-allocated
in cpu_exec_init().

Tha same applies for hotplug with cpu-add command if cpus are
added sequentially in increasing order as 'id' matches cpu_index.

If cpu-add had been used for creating out-of-order cpus,
that created unmigratable instance since it were not possible
to start target with the same cpu_index using old way
of migrating instance with hotplugged cpus:

* source QEMU with CLI (-smp 1,maxcpus=3 and cpu-add id=2)
  following set of cpu_index is allocated [0, 1] with
  apics set [0, 2] respectivelly
* target QEMU is started with CLI -smp 2,maxcpus=3
  resulting in set of cpu_index [0, 1] but with
  set of apics [0, 1] wich doesn't match source.

So we don't need compat code in this case as it's never worked
and newelly added device_add support would use stable cpu_index
set by machine to begin with, so it won't have above limitation
and source QEMU could be migrated to destination regardless
of the order cpus were created.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:08 -03:00
Igor Mammedov
69382d8b3e qdev: Fix object reference leak in case device.realize() fails
If device doesn't have parent assined before its realize
is called, device_set_realized() will implicitly set parent
to '/machine/unattached'.

However device_set_realized() may fail after that point at
several other points leaving not realized object dangling
in '/machine/unattached' and as result caller of

  obj = object_new()
    obj->ref == 1
  object_property_set_bool(obj,..., true, "realized",...)
    obj->ref == 2
  if (fail)
      object_unref(obj);
      obj->ref == 1

will get object leak instead of expected object destruction.

Fix it by making device_set_realized() to cleanup after itself
in case of failure.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:04 -03:00
Igor Mammedov
a07f953ef4 exec: Set cpu_index only if it's not been explictly set
It keeps the legacy behavior for all users that doesn't care
about stable cpu_index value, but would allow boards that
would support device_add/device_del to set stable cpu_index
that won't depend on order in which cpus are created/destroyed.

While at that simplify cpu_get_free_index() as cpu_index
generated by USER_ONLY and softmmu variants is the same
since none of the users support cpu-remove so far, except
of not yet released spapr/x86 device_add/delr, which
will be altered by follow up patches to set stable
cpu_index manually.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:01 -03:00
Igor Mammedov
8b1b835035 exec: Don't use cpu_index to detect if cpu_exec_init()'s been called
Instead use QTAIL's tqe_prev field to detect if cpu's been
placed in list by cpu_exec_init() which is always set if
QTAIL element is in list.

Fixes SIGSEGV on failure path in case cpu_index is assigned
by board and cpu.relalize() fails before cpu_exec_init() is called.

In follow up patches, cpu_index will be assigned by boards that
support cpu hot(un)plug and need stable cpu_index that doesn't
depend on order cpus are created/removed.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:32:00 -03:00
Igor Mammedov
1bc7e522d9 exec: Reduce CONFIG_USER_ONLY ifdeffenery
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-26 15:31:58 -03:00
Peter Maydell
c1fdfe9fca Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2016-07-26' into staging
Block patches for 2.7.0-rc1

# gpg: Signature made Tue 26 Jul 2016 18:11:36 BST
# gpg:                using RSA key 0x3BB14202E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40
#      Subkey fingerprint: 58B3 81CE 2DC8 9CF9 9730  EE64 3BB1 4202 E838 ACAD

* remotes/maxreitz/tags/pull-block-2016-07-26:
  iotest: fix python based IO tests
  block: export LUKS specific data to qemu-img info
  crypto: add support for querying parameters for block encryption
  AioContext: correct comments
  qcow2: do not allocate extra memory

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 18:22:49 +01:00
Daniel P. Berrange
4c44b4a4c8 iotest: fix python based IO tests
The previous commit refactoring iotests.py:

  commit 6661397446
  Author: Daniel P. Berrange <berrange@redhat.com>
  Date:   Wed Jul 20 14:23:10 2016 +0100

    scripts: refactor the VM class in iotests for reuse

was not properly tested and included a number of broken
bits.

 - The 'event_match' method was not moved into qemu.py
 - The 'self._args' list parameter in QEMUMachine needs
   to be copied otherwise modifications will affect the
   global 'qemu_opts' variable in iotests.py
 - The QEMUQtestMachine class methods had inverted
   parameter order for the super() calls
 - The QEMUQtestMachine class forgot to add
   '-machine accel=qtest'
 - The QEMUQtestMachine class constructor needs to set
   a default 'name' value before using it as it may
   be None
 - The QEMUQtestMachine class constructor needs to use
   named parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The 'qemu_prog' variable should be a string not a
   list in iotests.py
 - The VM classs constructor needs to use named
   parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The path to the socket-scm-helper needs to be
   passed into the QEMUMachine class

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469549767-27249-1-git-send-email-berrange@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 18:28:40 +02:00
Daniel P. Berrange
c7c4cf498f block: export LUKS specific data to qemu-img info
The qemu-img info command has the ability to expose format
specific metadata about volumes. Wire up this facility for
the LUKS driver to report on cipher configuration and key
slot usage.

    $ qemu-img info ~/VirtualMachines/demo.luks
    image: /home/berrange/VirtualMachines/demo.luks
    file format: luks
    virtual size: 98M (102760448 bytes)
    disk size: 100M
    encrypted: yes
    Format specific information:
        ivgen alg: plain64
        hash alg: sha1
        cipher alg: aes-128
        uuid: 6ddee74b-3a22-408c-8909-6789d4fa2594
        cipher mode: xts
        slots:
            [0]:
                active: true
                iters: 572706
                key offset: 4096
                stripes: 4000
            [1]:
                active: false
                key offset: 135168
            [2]:
                active: false
                key offset: 266240
            [3]:
                active: false
                key offset: 397312
            [4]:
                active: false
                key offset: 528384
            [5]:
                active: false
                key offset: 659456
            [6]:
                active: false
                key offset: 790528
            [7]:
                active: false
                key offset: 921600
        payload offset: 2097152
        master key iters: 142375

One somewhat undesirable artifact is that the data fields are
printed out in (apparently) random order. This will be addressed
later by changing the way the block layer pretty-prints the
image specific data.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469192015-16487-3-git-send-email-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Daniel P. Berrange
40c8502822 crypto: add support for querying parameters for block encryption
When creating new block encryption volumes, we accept a list of
parameters to control the formatting process. It is useful to
be able to query what those parameters were for existing block
devices. Add a qcrypto_block_get_info() method which returns a
QCryptoBlockInfo instance to report this data.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1469192015-16487-2-git-send-email-berrange@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Cao jin
54a16a63d0 AioContext: correct comments
Correct comments of field notify_me

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-id: 1468575858-22975-1-git-send-email-caoj.fnst@cn.fujitsu.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Vladimir Sementsov-Ogievskiy
ebf7bba090 qcow2: do not allocate extra memory
There are no needs to allocate more than one cluster, as we set
avail_out for deflate to one cluster.

Zlib docs (http://www.zlib.net/manual.html) says:
"deflate compresses as much data as possible, and stops when the input
buffer becomes empty or the output buffer becomes full."

So, deflate will not write more than avail_out to output buffer. If
there is not enough space in output buffer for compressed data (it may
be larger than input data) deflate just returns Z_OK. (if all data is
compressed and written to output buffer deflate returns Z_STREAM_END).

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 1468515565-81313-1-git-send-email-vsementsov@virtuozzo.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-26 17:46:37 +02:00
Peter Maydell
f49ee630d7 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160726' into staging
ppc patch queue 2016-07-26

Here's the current batch of ppc and spapr related patches intended for
qemu-2.7.  Given the late stage in 2.7 development, these are all
bugfixes with one exception:

The "spapr: disintricate core-id from DT semantics" changes the way
ids are assigned in the new core-based hotplug infrastructure.  This
isn't strictly a bugfix, but we've determined that the current way of
assigning core-ids will cause considerable grief with future plans for
cpu hotplug.  Therefore it's better to fix this now, late in 2.7,
before we have a released version with the problematic numbering.

# gpg: Signature made Tue 26 Jul 2016 04:04:57 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160726:
  spapr: disintricate core-id from DT semantics
  target-ppc: add PPC_MFTB flag to e500mc and e5500
  spapr: fix spapr-nvram migration
  hw/ppc/spapr: Make sure to close the htab_fd when migration is canceled
  ppc: Huge page detection mechanism fixes - Episode III

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 11:53:47 +01:00
Peter Maydell
a467bb9940 Merge remote-tracking branch 'remotes/mdroth/tags/qga-pull-2016-07-25-tag' into staging
qemu-ga patch queue for 2.7

* fix w32 build failures due to -Werror when building with VSS/fsfreeze
  enabled
* fix leaking for qemu-ga config files in `make check`

# gpg: Signature made Mon 25 Jul 2016 20:01:09 BST
# gpg:                using RSA key 0x3353C9CEF108B584
# gpg: Good signature from "Michael Roth <flukshun@gmail.com>"
# gpg:                 aka "Michael Roth <mdroth@utexas.edu>"
# gpg:                 aka "Michael Roth <mdroth@linux.vnet.ibm.com>"
# Primary key fingerprint: CEAC C9E1 5534 EBAB B82D  3FA0 3353 C9CE F108 B584

* remotes/mdroth/tags/qga-pull-2016-07-25-tag:
  configure: mark qemu-ga VSS includes as system headers
  tests: use static qga config file
  build-sys: link tests/data

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-26 10:53:23 +01:00
Michael Roth
690604f696 configure: mark qemu-ga VSS includes as system headers
As of e4650c81, we do w32 builds with -Werror enabled. Unfortunately
for cases where we enable VSS support in qemu-ga, we still have
warnings generated by VSS includes that ship as part of the Microsoft
VSS SDK.

We can selectively address a number of these warnings using

  #pragma GCC diagnostic ignored ...

but at least one of these:

  warning: ‘typedef’ was ignored in this declaration

resulting from declarations of the form:

  typedef struct Blah { ... };

does not provide a specific command-line/pragma option to disable
warnings of the sort.

To allow VSS builds to succeed, the next-best option is disabling
these warnings on a per-file basis. pragmas like #pragma GCC
system_header can be used to declare subsequent includes/declarations
as being exempt from normal warnings, but this must be done within
a header file.

Since we don't control the VSS SDK, we'd need to rely on a
intermediate header include to accomplish this, and
since different objects in the VSS link target rely on different
headers from the VSS SDK, this would become somewhat of a rat's nest
(though not totally unmanageable).

The next step up in granularity is just marking the entire VSS
SDK include path as system headers via -isystem. This is a bit more
heavy-handed, but since this SDK hasn't changed since 2005, there's
likely little to be gained from selectively disabling warnings
anyway, so we implement that approach here.

This fixes the -Werror failures in both the configure test and the
qga build due to shared reliance on $vss_win32_include. For the
same reason, this also enforces a new dependency on -isystem support
in the C/C++ compiler when building QGA with VSS enabled.

Cc: Thomas Huth <thuth@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:18 -05:00
Marc-André Lureau
1741b945f2 tests: use static qga config file
Do not create a leaking temporary file, but use a static file instead.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:18 -05:00
Marc-André Lureau
fe31017f79 build-sys: link tests/data
Link a common tests data directory to the build directory.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
2016-07-25 13:23:17 -05:00
Greg Kurz
12bf2d33fe spapr: disintricate core-id from DT semantics
The goal of this patch is to have a stable core-id which does not depend
on any DT related semantics, which involve non-obvious computations on
modern PowerPC server cpus.

With this patch, the DT core id is computed on-demand as:

       (core-id / smp_threads) * smt

where smt is the number of threads per core in the host.

This formula should be consolidated in a helper since it is needed in
several places.

Other uses for core-id includes: compute a stable cpu_index (which
allows random order hotplug/unplug without breaking migration) and
NUMA.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 15:43:41 +10:00
Michael Walle
2fff4bad40 target-ppc: add PPC_MFTB flag to e500mc and e5500
According to the e500mc and e5500 core reference manual they have support
for the mftb instruction.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 11:18:49 +10:00
lvivier@redhat.com
cf472f48d5 spapr: fix spapr-nvram migration
When spapr-nvram is backed by a file using pflash interface,
migration fails on the destination guest with assert:

    bdrv_co_pwritev: Assertion `!(bs->open_flags & 0x0800)' failed.

This avoids the problem by delaying the pflash update until after
the device loads complete.

This fix is similar to the one for the pflash_cfi01 migration:

    90c647d Fix pflash migration

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Thomas Huth
c573fc03da hw/ppc/spapr: Make sure to close the htab_fd when migration is canceled
When canceling a migration process, we currently do not close the
HTAB migration file descriptor since htab_save_complete() is never
called in that case. So we leave the migration process with a
dangling htab_fd value around, and this causes any further migration
attempts to fail. To fix this issue, simply make sure that the
htab_fd is closed during the migration cleanup stage. And since the
cleanup() function is also called when migration succeeds, we can
also remove the call to close_htab_fd() from the htab_save_complete()
function.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1354341
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Thomas Huth
3d4f253483 ppc: Huge page detection mechanism fixes - Episode III
After already fixing two issues with the huge page detection mechanism
(see commit 159d2e39a8 and 86b50f2e1b), Greg Kurz noticed another
case that caused the guest to crash where QEMU announces huge pages
though they should not be available for the guest:

qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
 -m 1G,slots=4,maxmem=32G
 -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
 -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
 -numa node,nodeid=0 -numa node,nodeid=1

That means if there is a global mem-path option, we still have
to look at the memory-backend objects that have been specified
additionally and return their minimum page size if that value
is smaller than the page size of the main memory.

Reported-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-25 10:19:30 +10:00
Peter Maydell
2d2e632ad0 Update version for v2.7.0-rc0 release
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-22 15:32:42 +01:00
Peter Maydell
01a720125f target-sh4: Use glib allocator in movcal helper
Coverity spots that helper_movcal() calls malloc() but doesn't
check for failure. Fix this by switching to the glib allocation
functions, which abort on allocation failure.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1468327859-21385-1-git-send-email-peter.maydell@linaro.org
Acked-by: Aurelien Jarno <aurelien@aurel32.net>
2016-07-22 11:33:24 +01:00
Peter Maydell
e3643d32ee Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.7-6' into staging
Migration:
- Fix a postcopy bug
- Add a testsuite for measuring migration performance

# gpg: Signature made Fri 22 Jul 2016 08:56:44 BST
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit-migration/tags/migration-for-2.7-6:
  tests: introduce a framework for testing migration performance
  scripts: ensure monitor socket has SO_REUSEADDR set
  scripts: set timeout when waiting for qemu monitor connection
  scripts: refactor the VM class in iotests for reuse
  scripts: add a 'debug' parameter to QEMUMonitorProtocol
  scripts: add __init__.py file to scripts/qmp/
  migration: set state to post-migrate on failure

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-22 10:51:32 +01:00
Daniel P. Berrange
409437e16d tests: introduce a framework for testing migration performance
This introduces a moderately general purpose framework for
testing performance of migration.

The initial guest workload is provided by the included 'stress'
program, which is configured to spawn one thread per guest CPU
and run a maximally memory intensive workload. It will loop
over GB of memory, xor'ing each byte with data from a 4k array
of random bytes. This ensures heavy read and write load across
all of guest memory to stress the migration performance. While
running the 'stress' program will record how long it takes to
xor each GB of memory and print this data for later reporting.

The test engine will spawn a pair of QEMU processes, either on
the same host, or with the target on a remote host via ssh,
using the host kernel and a custom initrd built with 'stress'
as the /init binary. Kernel command line args are set to ensure
a fast kernel boot time (< 1 second) between launching QEMU and
the stress program starting execution.

None the less, the test engine will initially wait N seconds for
the guest workload to stablize, before starting the migration
operation. When migration is running, the engine will use pause,
post-copy, autoconverge, xbzrle compression and multithread
compression features, as well as downtime & bandwidth tuning
to encourage completion. If migration completes, the test engine
will wait N seconds again for the guest workooad to stablize on
the target host. If migration does not complete after a preset
number of iterations, it will be aborted.

While the QEMU process is running on the source host, the test
engine will sample the host CPU usage of QEMU as a whole, and
each vCPU thread. While migration is running, it will record
all the stats reported by 'query-migration'. Finally, it will
capture the output of the stress program running in the guest.

All the data produced from a single test execution is recorded
in a structured JSON file. A separate program is then able to
create interactive charts using the "plotly" python + javascript
libraries, showing the characteristics of the migration.

The data output provides visualization of the effect on guest
vCPU workloads from the migration process, the corresponding
vCPU utilization on the host, and the overall CPU hit from
QEMU on the host. This is correlated from statistics from the
migration process, such as downtime, vCPU throttling and iteration
number.

While the tests can be run individually with arbitrary parameters,
there is also a facility for producing batch reports for a number
of pre-defined scenarios / comparisons, in order to be able to
get standardized results across different hardware configurations
(eg TCP vs RDMA, or comparing different VCPU counts / memory
sizes, etc).

To use this, first you must build the initrd image

 $ make tests/migration/initrd-stress.img

To run a a one-shot test with all default parameters

 $ ./tests/migration/guestperf.py > result.json

This has many command line args for varying its behaviour.
For example, to increase the RAM size and CPU count and
bind it to specific host NUMA nodes

 $ ./tests/migration/guestperf.py \
       --mem 4 --cpus 2 \
       --src-mem-bind 0 --src-cpu-bind 0,1 \
       --dst-mem-bind 1 --dst-cpu-bind 2,3 \
       > result.json

Using mem + cpu binding is strongly recommended on NUMA
machines, otherwise the guest performance results will
vary wildly between runs of the test due to lucky/unlucky
NUMA placement, making sensible data analysis impossible.

To make it run across separate hosts:

 $ ./tests/migration/guestperf.py \
       --dst-host somehostname > result.json

To request that post-copy is enabled, with switchover
after 5 iterations

 $ ./tests/migration/guestperf.py \
       --post-copy --post-copy-iters 5 > result.json

Once a result.json file is created, a graph of the data
can be generated, showing guest workload performance per
thread and the migration iteration points:

 $ ./tests/migration/guestperf-plot.py --output result.html \
        --migration-iters --split-guest-cpu result.json

To further include host vCPU utilization and overall QEMU
utilization

 $ ./tests/migration/guestperf-plot.py --output result.html \
        --migration-iters --split-guest-cpu \
	--qemu-cpu --vcpu-cpu result.json

NB, the 'guestperf-plot.py' command requires that you have
the plotly python library installed. eg you must do

 $ pip install --user  plotly

Viewing the result.html file requires that you have the
plotly.min.js file in the same directory as the HTML
output. This js file is installed as part of the plotly
python library, so can be found in

  $HOME/.local/lib/python2.7/site-packages/plotly/offline/plotly.min.js

The guestperf-plot.py program can accept multiple json files
to plot, enabling results from different configurations to
be compared.

Finally, to run the entire standardized set of comparisons

  $ ./tests/migration/guestperf-batch.py \
       --dst-host somehost \
       --mem 4 --cpus 2 \
       --src-mem-bind 0 --src-cpu-bind 0,1 \
       --dst-mem-bind 1 --dst-cpu-bind 2,3
       --output tcp-somehost-4gb-2cpu

will store JSON files from all scenarios in the directory
named tcp-somehost-4gb-2cpu

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-7-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:39 +05:30
Daniel P. Berrange
168ae6c24b scripts: ensure monitor socket has SO_REUSEADDR set
If tests use a TCP based monitor socket, the connection will
go into a TIMED_WAIT state when the test exits. This will
randomly prevent the test from being re-run without a certain
time period. Set the SO_REUSEADDR flag on the socket to ensure
we can immediately re-run the tests

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-6-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:35 +05:30
Daniel P. Berrange
238064621f scripts: set timeout when waiting for qemu monitor connection
If QEMU fails to launch for some reason, the QEMUMonitorProtocol
class accept() method will wait forever in a socket accept call.
Set a timeout of 15 seconds so that we fail more gracefully
instead of hanging the test script forever

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-5-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:28 +05:30
Daniel P. Berrange
6661397446 scripts: refactor the VM class in iotests for reuse
The iotests module has a python class for controlling QEMU
processes. Pull the generic functionality out of this file
and create a scripts/qemu.py module containing a QEMUMachine
class. Put the QTest integration support into a subclass
QEMUQtestMachine.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-4-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:24 +05:30
Daniel P. Berrange
991e7c4650 scripts: add a 'debug' parameter to QEMUMonitorProtocol
Add a 'debug' parameter to the QEMUMonitorProtocol class
which will cause it to print out all JSON strings on
sys.stderr

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-3-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:17 +05:30
Daniel P. Berrange
6f7a4a81ce scripts: add __init__.py file to scripts/qmp/
When searching for modules to load, python will ignore any
sub-directory which does not contain __init__.py. This means
that both scripts and scripts/qmp/ have to be explicitly added
to the python path. By adding a __init__.py file to scripts/qmp,
we only need add scripts/ to the python path and can then simply
do 'from qmp import qmp' to load scripts/qmp/qmp.py.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-Id: <1469020993-29426-2-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:13 +05:30
Dr. David Alan Gilbert
42da5550d6 migration: set state to post-migrate on failure
If a migration fails/is cancelled during the postcopy stage we currently
end up with the runstate as finish-migrate, where it should be post-migrate.
There's a small window in precopy where I think the same thing can
happen, but I've never seen it.

It rarely matters; the only postcopy case is if you restart a migration, which
again is a case that rarely matters in postcopy because it's only
safe to restart the migration if you know the destination hasn't
been running (which you might if you started the destination with -S
and hadn't got around to 'c' ing it before the postcopy failed).
Even then it's a small window but potentially you could hit if
there's a problem loading the devices on the destination.

This corresponds to:
https://bugzilla.redhat.com/show_bug.cgi?id=1355683

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Message-Id: <1468601086-32117-1-git-send-email-dgilbert@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
2016-07-22 13:23:09 +05:30
Peter Maydell
206d0c2436 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: new features, cleanups, fixes

- interrupt remapping for intel iommus
- a bunch of virtio cleanups
- fixes all over the place

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 21 Jul 2016 18:49:30 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (57 commits)
  intel_iommu: avoid unnamed fields
  virtio: Update migration docs
  virtio-gpu: Wrap in vmstate
  virtio-gpu: Use migrate_add_blocker for virgl migration blocking
  virtio-input: Wrap in vmstate
  9pfs: Wrap in vmstate
  virtio-serial: Wrap in vmstate
  virtio-net: Wrap in vmstate
  virtio-balloon: Wrap in vmstate
  virtio-rng: Wrap in vmstate
  virtio-blk: Wrap in vmstate
  virtio-scsi: Wrap in vmstate
  virtio: Migration helper function and macro
  virtio-serial: Remove old migration version support
  virtio-net: Remove old migration version support
  virtio-scsi: Replace HandleOutput typedef
  Revert "mirror: Workaround for unexpected iohandler events during completion"
  virtio-scsi: Call virtio_add_queue_aio
  virtio-blk: Call virtio_add_queue_aio
  virtio: Introduce virtio_add_queue_aio
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 20:12:37 +01:00
Michael S. Tsirkin
bc38ee10fc intel_iommu: avoid unnamed fields
Also avoid unnamed fields for portability.
Also, rename VTD_IRTE to VTD_IR_TableEntry for coding
style compliance.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
1a210f631b virtio: Update migration docs
Remove references to register_savevm.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
0fc07498da virtio-gpu: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
de8892215e virtio-gpu: Use migrate_add_blocker for virgl migration blocking
virgl conditionally registers a vmstate as unmigratable when virgl
is enabled; instead use the migrate_add_blocker mechanism.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
428d2ed2c8 virtio-input: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
18e0e5b240 9pfs: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
42e6c0390b virtio-serial: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
290c242845 virtio-net: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
7f1ca9b23b virtio-balloon: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
b607579386 virtio-rng: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
bbded32c64 virtio-blk: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
5a289a2883 virtio-scsi: Wrap in vmstate
Forcibly convert it to a vmstate wrapper;  proper conversion
comes later.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:20 +03:00
Dr. David Alan Gilbert
5943124cc0 virtio: Migration helper function and macro
To make conversion of virtio devices to VMState simple
at first add a helper function for the simple virtio_save
case and a helper macro that defines the VMState structure.
These will probably go away or change as more of the virtio
code gets converted.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Dr. David Alan Gilbert
71945ae164 virtio-serial: Remove old migration version support
virtio-serial-bus has had version 3 since 37f95bf3d0 in 0.13-rc0;
it's time to clean it up a bit.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Dr. David Alan Gilbert
76010cb320 virtio-net: Remove old migration version support
virtio-net has had version 11 since 0ce0e8f4 in 2009
(v0.11.0-rc0-1480-g0ce0e8f) - remove the code to support loading
anything earlier.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
209b27bbe9 virtio-scsi: Replace HandleOutput typedef
There is a new common one in virtio.h, use it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
d4a92a8420 Revert "mirror: Workaround for unexpected iohandler events during completion"
This reverts commit ab27c3b5e7.

The virtio storage device host notifiers now work with
bdrv_drained_begin/end, so we don't need this hack any more.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
1c627137c1 virtio-scsi: Call virtio_add_queue_aio
AIO based handler is more appropriate here because it will then
cooperate with bdrv_drained_begin/end. It is needed by the coming
revert patch.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
0ff841f6d1 virtio-blk: Call virtio_add_queue_aio
AIO based handler is more appropriate here because it will then
cooperate with bdrv_drained_begin/end. It is needed by the coming
revert patch.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
872dd82c83 virtio: Introduce virtio_add_queue_aio
Using this function instead of virtio_add_queue marks the vq as aio
based. This differentiation will be useful in later patches.

Distinguish between virtqueue processing in the iohandler context and main loop
AioContext.  iohandler context is isolated from AioContexts and therefore does
not run during aio_poll().

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Fam Zheng
bf1780b0d5 virtio: Add typedef for handle_output
The function pointer signature has been repeated a few times, using a
typedef may make coding easier.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
4684a20410 intel_iommu: disallow kernel-irqchip=on with IR
When user specify "intremap=on" with "-M kernel-irqchip=on", throw error
and then quit.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
54a6c11b20 kvm-all: add trace events for kvm irqchip ops
These will help us monitoring irqchip route activities more easily.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Radim Krčmář
a3f409cb4a intel_iommu: support all masks in interrupt entry cache invalidation
Linux guests do not gracefully handle cases when the invalidation mask
they wanted is not supported, probably because real hardware always
allowed all.

We can just say that all 16 masks are supported, because both
ioapic_iec_notifier and kvm_update_msi_routes_all invalidate all caches.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
3f1fea0fb5 kvm-irqchip: do explicit commit when update irq
In the past, we are doing gsi route commit for each irqchip route
update. This is not efficient if we are updating lots of routes in the
same time. This patch removes the committing phase in
kvm_irqchip_update_msi_route(). Instead, we do explicit commit after all
routes updated.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
e1d4fb2de5 kvm-irqchip: x86: add msi route notify fn
One more IEC notifier is added to let msi routes know about the IEC
changes. When interrupt invalidation happens, all registered msi routes
will be updated for all PCI devices.

Since both vfio and vhost are possible gsi route consumers, this patch
will go one step further to keep them safe in split irqchip mode and
when irqfd is enabled.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
[move trace-events lines into target-i386/trace-events]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
38d87493f3 kvm-irqchip: i386: add hook for add/remove virq
Adding two hooks to be notified when adding/removing msi routes. There
are two kinds of MSI routes:

- in kvm_irqchip_add_irq_route(): before assigning IRQFD. Used by
  vhost, vfio, etc.

- in kvm_irqchip_send_msi(): when sending direct MSI message, if
  direct MSI not allowed, we will first create one MSI route entry
  in the kernel, then trigger it.

This patch only hooks the first one (irqfd case). We do not need to
take care for the 2nd one, since it's only used by QEMU userspace
(kvm-apic) and the messages will always do in-time translation when
triggered. While we need to note them down for the 1st one, so that we
can notify the kernel when cache invalidation happens.

Also, we do not hook IOAPIC msi routes (we have explicit notifier for
IOAPIC to keep its cache updated). We only need to care about irqfd
users.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:19 +03:00
Peter Xu
d1f6af6a17 kvm-irqchip: simplify kvm_irqchip_add_msi_route
Changing the original MSIMessage parameter in kvm_irqchip_add_msi_route
into the vector number. Vector index provides more information than the
MSIMessage, we can retrieve the MSIMessage using the vector easily. This
will avoid fetching MSIMessage every time before adding MSI routes.

Meanwhile, the vector info will be used in the coming patches to further
enable gsi route update notifications.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:18 +03:00
Peter Xu
ede9c94acf intel_iommu: add SID validation for IR
This patch enables SID validation. Invalid interrupts will be dropped.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:44:16 +03:00
Jan Kiszka
28589311b3 intel_iommu: Add support for Extended Interrupt Mode
As neither QEMU nor KVM support more than 255 CPUs so far, this is
simple: we only need to switch the destination ID translation in
vtd_remap_irq_get if EIME is set.

Once CFI support is there, it will have to take EIM into account as
well. So far, nothing to do for this.

This patch allows to use x2APIC in split irqchip mode of KVM.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
[use le32_to_cpu() to retrieve dest_id]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
e3d9c92507 ioapic: register IOMMU IEC notifier for ioapic
Let IOAPIC the first consumer of x86 IOMMU IEC invalidation
notifiers. This is only used for split irqchip case, when vIOMMU
receives IR invalidation requests, IOAPIC will be notified to update
kernel irq routes. For simplicity, we just update all IOAPIC routes,
even if the invalidated entries are not IOAPIC ones.

Since now we are creating IOMMUs using "-device" parameter, IOMMU
device will be created after IOAPIC.  We need to do the registration
after machine done by leveraging machine_done notifier.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
02a2cbc872 x86-iommu: introduce IEC notifiers
This patch introduces x86 IOMMU IEC (Interrupt Entry Cache)
invalidation notifier list. When vIOMMU receives IEC invalidate
request, all the registered units will be notified with specific
invalidation requests.

Intel IOMMU is the first provider that generates such a event.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
8b5ed7dffa intel_iommu: add support for split irqchip
In split irqchip mode, IOAPIC is working in user space, only update
kernel irq routes when entry changed. When IR is enabled, we directly
update the kernel with translated messages. It works just like a kernel
cache for the remapping entries.

Since KVM irqfd is using kernel gsi routes to deliver interrupts, as
long as we can support split irqchip, we will support irqfd as
well. Also, since kernel gsi routes will cache translated interrupts,
irqfd delivery will not suffer from any performance impact due to IR.

And, since we supported irqfd, vhost devices will be able to work
seamlessly with IR now. Logically this should contain both vhost-net and
vhost-user case.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[move trace-events lines into target-i386/trace-events]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
c15fa0bea9 ioapic: introduce ioapic_entry_parse() helper
Abstract IOAPIC entry parsing logic into a helper function.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Peter Xu
cb135f59b8 q35: ioapic: add support for emulated IOAPIC IR
This patch translates all IOAPIC interrupts into MSI ones. One pseudo
ioapic address space is added to transfer the MSI message. By default,
it will be system memory address space. When IR is enabled, it will be
IOMMU address space.

Currently, only emulated IOAPIC is supported.

Idea suggested by Jan Kiszka and Rita Sinha in the following patch:

https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg01933.html

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:49 +03:00
Michael S. Tsirkin
09cd058a2c intel_iommu: get rid of {0} initializers
Correct and portable in theory, but triggers warnings with older gcc
versions when -Wmissing-braces is enabled.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-21 20:43:43 +03:00
Peter Maydell
7239247a2b Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2016-07-21-1' into staging
Merge qcrypto-next 2016/07/21 v1

# gpg: Signature made Thu 21 Jul 2016 11:07:36 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2016-07-21-1:
  crypto: don't open-code qcrypto_hash_supports
  crypto: use glib as fallback for hash algorithm
  crypto: use /dev/[u]random as a final fallback random source

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 11:48:49 +01:00
Peter Maydell
61ead113ae Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

v2:
 * Resolved merge conflict with block/iscsi.c [Peter]

# gpg: Signature made Wed 20 Jul 2016 17:20:52 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request: (25 commits)
  raw_bsd: Convert to byte-based interface
  nbd: Convert to byte-based interface
  block: Kill .bdrv_co_discard()
  sheepdog: Switch .bdrv_co_discard() to byte-based
  raw_bsd: Switch .bdrv_co_discard() to byte-based
  qcow2: Switch .bdrv_co_discard() to byte-based
  nbd: Switch .bdrv_co_discard() to byte-based
  iscsi: Switch .bdrv_co_discard() to byte-based
  gluster: Switch .bdrv_co_discard() to byte-based
  blkreplay: Switch .bdrv_co_discard() to byte-based
  block: Add .bdrv_co_pdiscard() driver callback
  block: Convert .bdrv_aio_discard() to byte-based
  rbd: Switch rbd_start_aio() to byte-based
  raw-posix: Switch paio_submit() to byte-based
  block: Convert BB interface to byte-based discards
  block: Convert bdrv_aio_discard() to byte-based
  block: Switch BlockRequest to byte-based
  block: Convert bdrv_discard() to byte-based
  block: Convert bdrv_co_discard() to byte-based
  iscsi: Rely on block layer to break up large requests
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

Conflicts:
	block/gluster.c
2016-07-21 11:00:36 +01:00
Pranith Kumar
b95aae121b Revert e5dfc5e8e("Move README to markdown")
checkpatch.pl and other scripts fail without README. Revert
the rename for now; we may add README.md as a symlink later.

This reverts commit e5dfc5e8e7.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
Message-id: 20160720203131.30229-2-bobby.prani@gmail.com
[PMM: tweaked commit message a little]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-21 10:46:41 +01:00
Daniel P. Berrange
7603289712 crypto: don't open-code qcrypto_hash_supports
Call the existing qcrypto_hash_supports method from
qcrypto_hash_bytesv instead of open-coding it again.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Daniel P. Berrange
2165477c0f crypto: use glib as fallback for hash algorithm
GLib >= 2.16 provides GChecksum API which is good enough
for md5, sha1, sha256 and sha512. Use this as a final
fallback if neither nettle or gcrypt are available. This
lets us remove the stub hash impl, and so callers can
be sure those 4 algs are always available at compile
time. They may still be disabled at runtime, so a check
for qcrypto_hash_supports() is still best practice to
report good error messages.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Daniel P. Berrange
f3c8355c7a crypto: use /dev/[u]random as a final fallback random source
If neither gcrypt or gnutls are available to provide a
cryptographic random number generator, fallback to consuming
bytes directly from /dev/[u]random.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-21 10:46:27 +01:00
Peter Maydell
e66b05e9ca Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 queue, 2016-07-20

# gpg: Signature made Wed 20 Jul 2016 16:07:38 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request: (28 commits)
  pc: Make device_del CPU work for x86 CPUs
  target-i386: Add x86_cpu_unrealizefn()
  apic: Use apic_id as apic's migration instance_id
  (kvm)apic: Add unrealize callbacks
  apic: kvm-apic: Fix crash due to access to freed memory region
  apic: Drop APICCommonState.idx and use APIC ID as index in local_apics[]
  apic: move MAX_APICS check to 'apic' class
  pc: Implement query-hotpluggable-cpus callback
  pc: cpu: Allow device_add to be used with x86 cpu
  pc: Enforce adding CPUs contiguously and removing them in opposite order
  pc: Forbid BSP removal
  pc: Register created initial and hotpluged CPUs in one place pc_cpu_plug()
  pc: Delay setting number of boot CPUs to machine_done time
  pc: Set APIC ID based on socket/core/thread ids if it's not been set yet
  target-i386: Fix apic object leak when CPU is deleted
  target-i386: cpu: Do not ignore error and fix apic parent
  target-i386: Add support for UMIP and RDPID CPUID bits
  target-i386: Add socket/core/thread properties to X86CPU
  target-i386: Replace custom apic-id setter/getter with static property
  pc: cpu: Consolidate apic-id validity checks in pc_cpu_pre_plug()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 21:32:56 +01:00
Peter Maydell
3b55fbdcb0 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160720' into staging
Fixes for s390x in the css area.

# gpg: Signature made Wed 20 Jul 2016 15:12:43 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160720:
  s390x/css: provide a dev_path for css devices
  s390x/css: sch_handle_start_func() handles resume, too
  s390x/css: copy CCW format bit from ORB to SCSW

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 20:59:05 +01:00
Peter Maydell
6a426eb27e Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160720-1' into staging
usb: xhci assert fix, add usbredir streams property

# gpg: Signature made Wed 20 Jul 2016 12:32:09 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20160720-1:
  usbredir: add streams property
  xhci: Fix possible side effect from assert()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 20:31:31 +01:00
Peter Maydell
518cb31fa7 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20160720-1' into staging
qxl: fix qxl_set_dirty call in qxl_dirty_one_surface

# gpg: Signature made Wed 20 Jul 2016 12:28:01 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vga-20160720-1:
  qxl: fix qxl_set_dirty call in qxl_dirty_one_surface

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 19:41:20 +01:00
Peter Maydell
46ca418d9f Merge remote-tracking branch 'remotes/famz/tags/docker-pull-request' into staging
# gpg: Signature made Wed 20 Jul 2016 12:19:56 BST
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/docker-pull-request:
  docker: pass EXECUTABLE to build script
  docker: Don't start a container that doesn't exist
  docker: Add "images" subcommand to docker.py
  docker: Fix exit code if $CMD failed
  docker: More sensible run script
  tests/docker/docker.py: add update operation
  tests/docker/dockerfiles: new debian-bootstrap.docker
  tests/docker/docker.py: check and run .pre script
  tests/docker/docker.py: support --include-executable
  tests/docker/docker.py: docker_dir outside build

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 18:52:10 +01:00
Peter Xu
651e4cefee intel_iommu: Add support for PCI MSI remap
This patch enables interrupt remapping for PCI devices.

To play the trick, one memory region "iommu_ir" is added as child region
of the original iommu memory region, covering range 0xfeeXXXXX (which is
the address range for APIC). All the writes to this range will be taken
as MSI, and translation is carried out only when IR is enabled.

Idea suggested by Paolo Bonzini.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:31:04 +03:00
Peter Xu
a4ca297e84 intel_iommu: add IR translation faults defines
Adding translation fault definitions for interrupt remapping. Please
refer to VT-d spec section 7.1.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1f91acee17 intel_iommu: define several structs for IOMMU IR
Several data structs are defined to better support the rest of the
patches: IRTE to parse remapping table entries, and IOAPIC/MSI related
structure bits to parse interrupt entries to be filled in by guest
kernel.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
80de52ba87 intel_iommu: handle interrupt remap enable
Handle writting to IRE bit in global command register.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
a58614391d intel_iommu: define interrupt remap table addr register
Defined Interrupt Remap Table Address register to store IR table
pointer. Also, do proper handling on global command register writes to
store table pointer and its size.

One more debug flag "DEBUG_IR" is added for interrupt remapping.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
cfc13df462 acpi: add DMAR scope definition for root IOAPIC
To enable interrupt remapping for intel IOMMU device, each IOAPIC device
in the system reported via ACPI MADT must be explicitly enumerated under
one specific remapping hardware unit. This patch adds the root-complex
IOAPIC into the default DMAR device.

Please refer to VT-d spec 8.3.1.1 for more information.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
d54bd7f80a intel_iommu: set IR bit for ECAP register
Enable IR in IOMMU Extended Capability register.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
b79104722f intel_iommu: allow queued invalidation for IR
Queued invalidation is required for IR. This patch add basic support for
interrupt cache invalidate requests. Since we currently have no IR cache
implemented yet, we can just skip all interrupt cache invalidation
requests for now.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
d46114f9ec acpi: enable INTR for DMAR report structure
In ACPI DMA remapping report structure, enable INTR flag when specified.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1121e0afdc x86-iommu: introduce "intremap" property
Adding one property for intel-iommu devices to specify whether we should
support interrupt remapping. By default, IR is disabled. To enable it,
we should use (take Intel IOMMU as example):

  -device intel_iommu,intremap=on

This property can be shared by Intel and future AMD IOMMUs.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1cf5fd573f x86-iommu: provide x86_iommu_get_default
Instead of searching the device tree every time, one static variable is
declared for the default system x86 IOMMU device.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
04af0e18bc intel_iommu: rename VTD_PCI_DEVFN_MAX to x86-iommu
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Peter Xu
1c7955c450 x86-iommu: introduce parent class
Introducing parent class for intel-iommu devices named "x86-iommu". This
is preparation work to abstract shared functionalities out from Intel
and AMD IOMMUs. Currently, only the parent class is introduced. It does
nothing yet.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:27 +03:00
Marcel Apfelbaum
b1af7959a6 hw/versatile: realize the PCI root bus as part of the versatile init
'Realize' the PCI root bus manually since the 'realize' mechanism
does not propagate to child devices yet.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:27 +03:00
Marcel Apfelbaum
685f9a3428 hw/prep: realize the PCI root bus as part of the prep init
'Realize' the PCI root bus manually since the 'realize' mechanism
does not propagate to child devices yet.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
3c3c1e3203 hw/grackle: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
2f3ae0b2d4 hw/apb: fix PCI bus initialization
Create and connect the PCI root bus to the
host bridge before the later is 'realized'.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
a8c1a75343 hw/mips: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Leon Alrae <leon.alrae@imgtec.com>
Tested-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
50d3bba9da hw/alpha: fix PCI bus initialization
Delay the host-bridge 'realization' until the
PCI root bus is attached.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Marcel Apfelbaum
eaf8d91cd7 tests/prom-env-test: increase the test timeout
On a slower machine the test can take more than 30 seconds.
Increase the timeout to 100 seconds.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-20 19:30:26 +03:00
Stefan Hajnoczi
cdcab9d941 nvdimm: fix memory leak in error code path
object_get_canonical_path_component() returns a heap-allocated string
that must be freed using g_free().

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-20 19:30:26 +03:00
Peter Maydell
e0ce97f896 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Wed 20 Jul 2016 01:18:39 BST
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  block/gluster: add support for multiple gluster servers
  block/gluster: using new qapi schema
  block/gluster: deprecate rdma support
  block/gluster: code cleanup
  block/gluster: rename [server, volname, image] -> [host, volume, path]
  mirror: fix request throttling in drive-mirror
  mirror: improve performance of mirroring of empty disk
  mirror: efficiently zero out target
  mirror: optimize dirty bitmap filling in mirror_run a bit
  block: remove extra condition in bdrv_can_write_zeroes_with_unmap
  mirror: create mirror_dirty_init helper for mirror_run
  mirror: create mirror_throttle helper
  mirror: make sectors_in_flight int64_t
  dirty-bitmap: operate with int64_t amount

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 17:05:35 +01:00
Igor Mammedov
8fe6374e8e pc: Make device_del CPU work for x86 CPUs
ACPI subsystem already has all logic in place the only
thing left to eject CPU is destroy it and ammend
present CPUs counter in CMOS, do so.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:20 -03:00
Igor Mammedov
c884776e9d target-i386: Add x86_cpu_unrealizefn()
First remove VCPU from exec loop and only then remove lapic.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:20 -03:00
Igor Mammedov
f6e984443f apic: Use apic_id as apic's migration instance_id
instance_id is generated by last_used_id + 1 for a given device type
so for QEMU with 3 CPUs instance_id for APICs is a seti of [0, 1, 2]
When CPU in the middle is hot-removed and migration started
APICs with instance_ids 0 and 2 are transferred in migration stream.
However target starts with 2 CPUs and APICs' instance_ids are
generated from scratch [0, 1] hence migration fails with error
  Unknown savevm section or instance 'apic' 2

Fix issue by manually registering APIC's vmsd with apic_id as
instance_id, in this case instance_id on target will always
match instance_id on source as apic_id is the same for a given
cpu instance.

Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
9c156f9de5 (kvm)apic: Add unrealize callbacks
Callbacks will do necessary cleanups before APIC device is deleted

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
365aa1131f apic: kvm-apic: Fix crash due to access to freed memory region
kvm-apic.io_memory memory region had its parent set to NULL at
memory_region_init_io() time, so it ended up as a child in
 /unattached contaner.
As result when kvm-apic instance was deleted, the child property
 /unattached/kvm-apic-msi[XXX] contained a reference to
kvm-apic.io_memory address which was freed as part of kvm-apic.

Do the same as 'apic' and make kvm-apic instance the owner
of the memory region so that it won't end up in /unattached
and gets cleanly released along with related kvm-apic instance.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
1dfe3282cf apic: Drop APICCommonState.idx and use APIC ID as index in local_apics[]
local_apics[] is sized to contain all APIC ID supported in xAPIC mode,
so use APIC ID as index in it instead of constantly increasing counter idx.

Fixes error "apic initialization failed" when a CPU hotplugged and
unplugged more times than there are free slots in local_apics[].

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
889211b18b apic: move MAX_APICS check to 'apic' class
MAX_APICS is only used by child 'apic' class and not
by its parent TYPE_APIC_COMMON or any other derived
class.

Move check into end user 'apic' class so it won't
get in the way of other APIC implementations
if they support more then MAX_APICS.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
4d952914a0 pc: Implement query-hotpluggable-cpus callback
it returns a list of present/possible to hotplug CPU
objects with a list of properties to use with
device_add.

in PC case returned list would looks like:
-> { "execute": "query-hotpluggable-cpus" }
<- {"return": [
     {
        "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
        "props": {"core-id": 0, "socket-id": 1, "thread-id": 0}
     },
     {
        "qom-path": "/machine/unattached/device[0]",
        "type": "qemu64-x86_64-cpu", "vcpus-count": 1,
        "props": {"core-id": 0, "socket-id": 0, "thread-id": 0}
     }
   ]}

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
edd1211194 pc: cpu: Allow device_add to be used with x86 cpu
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
4da7faaeb0 pc: Enforce adding CPUs contiguously and removing them in opposite order
It will still allow us to use cpu_index as migration instance_id
since when CPUs are added contiguously (from the first to the last)
and removed in opposite order, cpu_index stays stable and it's
reproducible on destination side.

While there is work in progress to support migration when there
are holes in cpu_index range resulting from out-of-order plug or
unplug, this patch is intended as an interim solution until
cpu_index usage is cleaned up.

As result of this patch it would be possible to plug/unplug CPUs,
but in limited order that doesn't break migration.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
73360e2785 pc: Forbid BSP removal
Boot CPU is assumed to always present in QEMU code, so
untile that assumptions are gone, deny removal request,
In another words QEMU won't support BSP hot-unplug.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:19 -03:00
Igor Mammedov
a44a49dbf2 pc: Register created initial and hotpluged CPUs in one place pc_cpu_plug()
Consolidate possible_cpus array management in pc_cpu_plug() for
smp_cpus, coldplugged with -device and hotplugged with
device_add.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
ba157b696c pc: Delay setting number of boot CPUs to machine_done time
Currently present CPUs counter in CMOS only contains
smp_cpus (i.e. initial CPUs specified with -smp X) and
doesn't account for CPUs created with -device.
If VM is started with additional CPUs added with
 -device, it will hang in BIOS waiting for condition
   smp_cpus == counted_cpus
forever as counted_cpus will include -device CPUs as well
and be more than smp_cpus.

Make present CPUs counter in CMOS to count all CPUs
(initial and coldplugged with -device) by delaying
it to machine done time when it possible to count
CPUs added with -device.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
e8f7b83e88 pc: Set APIC ID based on socket/core/thread ids if it's not been set yet
CPU added with device_add help won't have APIC ID set,
so set it according to socket/core/thread ids provided
with device_add command.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
67e55caa6d target-i386: Fix apic object leak when CPU is deleted
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
6816b1b381 target-i386: cpu: Do not ignore error and fix apic parent
object_property_add_child() silently fails with error that it can't
create duplicate propery 'apic' as we already have 'apic' property
registered for 'apic' feature. As result generic device_realize puts
apic into unattached container.

As it's programming error, abort if name collision happens in future
and fix property name for apic_state to 'lapic', this way apic is
a child of cpu instance.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Paolo Bonzini
c2f193b538 target-i386: Add support for UMIP and RDPID CPUID bits
These are both stored in CPUID[EAX=7,EBX=0].ECX.  KVM is going to
be able to emulate both (albeit with a performance loss in the case
of RDPID, which therefore will be in KVM_GET_EMULATED_CPUID rather
than KVM_GET_SUPPORTED_CPUID).

It's also possible to implement both in TCG, but this is for 2.8.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
d89c2b8b98 target-i386: Add socket/core/thread properties to X86CPU
These properties will be used by as address where to plug
CPU with help -device/device_add commands.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
2da00e3176 target-i386: Replace custom apic-id setter/getter with static property
Custom apic-id setter/getter doesn't do any property specific
checks anymore, so clean it up and use more compact static
property DEFINE_PROP_UINT32 instead.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:18 -03:00
Igor Mammedov
4ec60c76d5 pc: cpu: Consolidate apic-id validity checks in pc_cpu_pre_plug()
Machine code knows about all possible APIC IDs so use that
instead of hack which does O(n^2) complexity duplicate
checks, interating over global CPUs list.
As result duplicate check is done only once with O(log n) complexity.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 12:02:17 -03:00
Igor Mammedov
7baef5cfea pc: Extract CPU lookup into a separate function
It will be reused in the next patch at pre_plug time

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
11f6fee576 target-i386: Set physical address bits based on host
Add the host-phys-bits boolean property, if true, take phys-bits
from the hosts physical bits value, overriding either the default
or the user specified value.

We can also use the value we read from the host to check the users
explicitly set value and warn them if it doesn't match.

Note:
   a) We only read the hosts value in KVM mode (because on non-x86
      we get an abort if we try)
   b) We don't warn about trying to use host-phys-bits in TCG mode,
      we just fall back to the TCG default.  This allows the machine
      type to set the host-phys-bits flag if it wants and then to
      work in both TCG and KVM.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Igor Mammedov
9f3aab5853 pc: Add x86_topo_ids_from_apicid()
It's reverse of apicid_from_topo_ids() and will be used in follow up
patches to fill in data structures for query-hotpluggable-cpus and
for user friendly error reporting.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Igor Mammedov
d9c84f1969 target-i386: Use uint32_t for X86CPU.apic_id
Redo 9886e834 (target-i386: Require APIC ID to be explicitly set before
CPU realize) in another way that doesn't use int64_t to detect
if apic-id property has been set.

Use the fact that 0xFFFFFFFF is the broadcast
value that a CPU can't have and set default
uint32_t apic_id to it instead of using int64_t.

Later uint32_t apic_id will be used to drop custom
property setter/getter in favor of static property.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
fcc35e7cca target-i386: Fill high bits of mtrr mask
Fill the bits between 51..number-of-physical-address-bits in the
MTRR_PHYSMASKn variable range mtrr masks so that they're consistent
in the migration stream irrespective of the physical address space
of the source VM in a migration.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:44 -03:00
Dr. David Alan Gilbert
112dad69d7 target-i386: Mask mtrr mask based on CPU physical address limits
The CPU GPs if we try and set a bit in a variable MTRR mask above
the limit of physical address bits on the host.  We hit this
when loading a migration from a host with a larger physical
address limit than our destination (e.g. a Xeon->i7 of same
generation) but previously used to get away with it
until 48e1a45 started checking that msr writes actually worked.

It seems in our case the GP probably comes from KVM emulating
that GP.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Dr. David Alan Gilbert
af45907a13 target-i386: Allow physical address bits to be set
Currently QEMU sets the x86 number of physical address bits to the
magic number 40.  This is only correct on some small AMD systems;
Intel systems tend to have 36, 39, 46 bits, and large AMD systems
tend to have 48.

Having the value different from your actual hardware is detectable
by the guest and in principal can cause problems;
The current limit of 40 stops TB VMs being created by those lucky
enough to have that much.

This patch lets you set the physical bits by a cpu property but
defaults to the same 40bits which matches TCGs setup.

I've removed the ancient warning about the 42 bit limit in exec.c;
I can't find that limit in there and no one else seems to know where
it is.

We use a magic value of 0 as the property default so that we can
later distinguish between the default and a user set value.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Dr. David Alan Gilbert
709787ee99 target-i386: Provide TCG_PHYS_ADDR_BITS
Provide a constant for the number of address bits supported under TCG.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-20 11:58:43 -03:00
Cornelia Huck
2a79eb1a61 s390x/css: provide a dev_path for css devices
We need to implement the get_dev_path method for the css bus, or
else we might end up with two different devices having the same
qdev_path.

This was noticed when adding two scsi_hd controllers: The SCSIBus
code will produce a non-unique dev_path for vmstate usage if the
parent bus does not provide the get_dev_path method.

We simply use the device's bus id, as this is unique and we won't
have any deeper hierarchy from a channel subsystem perspective
anyway.

Note that we need to disable this for older machine versions,
as this changes the migration format.

Reported-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Tested-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Sascha Silbe
727a0424dd s390x/css: sch_handle_start_func() handles resume, too
It's not obvious from the code flow that sch_handle_start_func() gets
called for rsch. Add some comments explaining this.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Sascha Silbe
485dd69088 s390x/css: copy CCW format bit from ORB to SCSW
The CCW Format (F) flag of the Subchannel-Status Word (SCSW) indicates
the format of the CCWs "associated with an I/O operation", i.e. the
value of CCW-Format Control (F) bit of the Operation-Request Block
(ORB).

Copy the CCW format bit from the ORB to the SCSW so we correctly
indicate the format of the CCWs to the guest.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-20 15:47:25 +02:00
Peter Maydell
3b2e6798ff Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-07-19' into staging
QAPI patches for 2016-07-19

# gpg: Signature made Tue 19 Jul 2016 19:35:27 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2016-07-19:
  net: Use correct type for bool flag
  qapi: Change Netdev into a flat union
  block: Simplify drive-mirror
  block: Simplify block_set_io_throttle
  qapi: Implement boxed types for commands/events
  qapi: Plumb in 'boxed' to qapi generator lower levels
  qapi-event: Simplify visit of non-implicit data
  qapi: Drop useless gen_err_check()
  qapi: Add type.is_empty() helper
  qapi: Hide tag_name data member of variants
  qapi: Special case c_name() for empty type
  qapi: Require all branches of flat union enum to be covered
  net: use Netdev instead of NetClientOptions in client init
  qapi: change QmpInputVisitor to QSLIST
  qapi: change QmpOutputVisitor to QSLIST

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 14:34:08 +01:00
Eric Blake
decaeed773 raw_bsd: Convert to byte-based interface
Since the raw format driver is just passing things through, we can
do byte-based read and write if the underlying protocol does
likewise.

There's one tricky part - if we probed the image format, we document
that we restrict operations on the initial sector.  It's easiest to
keep this guarantee by enforcing read-modify-write on sub-sector
operations (yes, this partially reverts commit ad82be2f).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-20-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
70c4fb2648 nbd: Convert to byte-based interface
The NBD protocol doesn't have any notion of sectors, so it is
a fairly easy conversion to use byte-based read and write.

Signed-off-by: Eric Blake <eblake@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468624988-423-19-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
02aefe43cb block: Kill .bdrv_co_discard()
Now that all drivers have a byte-based .bdrv_co_pdiscard(), we
no longer need to worry about the sector-based version.  We can
also relax our minimum alignment to 1 for drivers that support it.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-18-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
dde4753763 sheepdog: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-17-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
5f61ad079a raw_bsd: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-16-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
82e8a7888b qcow2: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-15-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
447e57c3b0 nbd: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

While at it, call directly into nbd-client.c instead of having
a pointless trivial wrapper in nbd.c.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-14-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
97c7e85cfe iscsi: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Unlike write_zeroes, where we can be handed unaligned requests
and must fail gracefully with -ENOTSUP for a fallback, we are
guaranteed that discard requests are always aligned because the
block layer already ignored unaligned head/tail.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-13-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:24:25 +01:00
Eric Blake
1014170b82 gluster: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-12-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
aba76e2f03 blkreplay: Switch .bdrv_co_discard() to byte-based
Another step towards killing off sector-based block APIs.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-11-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
47a5486d59 block: Add .bdrv_co_pdiscard() driver callback
There's enough drivers with a sector-based callback that it will
be easier to switch one at a time.  This patch adds a byte-based
callback, and then after all drivers are swapped, we'll drop the
sector-based callback.

[checkpatch doesn't like the space after coroutine_fn in
block_int.h, but it's consistent with the rest of the file]

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-10-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
4da444a0bb block: Convert .bdrv_aio_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based driver callback .bdrv_aio_discard() with a new
byte-based .bdrv_aio_pdiscard().  Only raw-posix and RBD drivers
are affected, so it was not worth splitting into multiple patches.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-9-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
7bbca9e290 rbd: Switch rbd_start_aio() to byte-based
The internal function converts to byte-based before calling into
RBD code; hoist the conversion to the callers so that callers
can then be switched to byte-based themselves.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-8-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
36e3b2e733 raw-posix: Switch paio_submit() to byte-based
The only remaining uses of paio_submit() were flush (with no
offset or count) and discard (which we are switching to byte-based);
furthermore, the similarly named paio_submit_co() is already
byte-based.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-7-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
1c6c4bb7f0 block: Convert BB interface to byte-based discards
Change sector-based blk_discard(), blk_co_discard(), and
blk_aio_discard() to instead be byte-based blk_pdiscard(),
blk_co_pdiscard(), and blk_aio_pdiscard().  NBD gets a lot
simpler now that ignoring the unaligned portion of a
byte-based discard request is handled under the hood by
the block layer.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-6-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
60ebac16bc block: Convert bdrv_aio_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_aio_discard() with a new byte-based
bdrv_aio_pdiscard(), which silently ignores any unaligned head
or tail.  Driver callbacks will be converted in followup patches.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-5-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
b15404e027 block: Switch BlockRequest to byte-based
BlockRequest is the internal struct used by bdrv_aio_*.  At the
moment, all such calls were sector-based, but we will eventually
convert to byte-based; start by changing the internal variables
to be byte-based.  No change to behavior, although the read and
write code can now go byte-based through more of the stack.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468624988-423-4-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
0c51a893b6 block: Convert bdrv_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_discard() with a new byte-based
bdrv_pdiscard(), which silently ignores any unaligned head
or tail.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-3-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:55 +01:00
Eric Blake
9f1963b3f7 block: Convert bdrv_co_discard() to byte-based
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_co_discard() with a new byte-based
bdrv_co_pdiscard(), which silently ignores any unaligned head
or tail.  Driver callbacks will be converted in followup patches.

By calculating the alignment outside of the loop, and clamping
the max discard to an aligned value, we can simplify the actions
done within the loop.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468624988-423-2-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
6bd01f14db iscsi: Rely on block layer to break up large requests
Now that the block layer honors max_request, we don't need to
bother with an EINVAL on overlarge requests, but can instead
assert that requests are well-behaved.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-7-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
1e2a77a851 nbd: Drop unused offset parameter
Now that NBD relies on the block layer to fragment things, we no
longer need to track an offset argument for which fragment of
a request we are actually servicing.

While at it, use true and false instead of 0 and 1 for a bool
parameter.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-6-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
fb1a6de14a nbd: Rely on block layer to break up large requests
Now that the block layer will honor max_transfer, we can simplify
our code to rely on that guarantee.

The readv code can call directly into nbd-client, just as the
writev code has done since commit 52a4650.

Interestingly enough, while qemu-io 'w 0 40m' splits into a 32M
and 8M transaction, 'w -z 0 40m' splits into two 16M and an 8M,
because the block layer caps the bounce buffer for writing zeroes
at 16M.  When we later introduce support for NBD_CMD_WRITE_ZEROES,
we can get a full 32M zero write (or larger, if the client and
server negotiate that write zeroes can use a larger size than
ordinary writes).

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468607524-19021-5-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
04ed95f484 block: Fragment writes to max transfer length
Drivers should be able to rely on the block layer honoring the
max transfer length, rather than needing to return -EINVAL
(iscsi) or manually fragment things (nbd).  We already fragment
write zeroes at the block layer; this patch adds the fragmentation
for normal writes, after requests have been aligned (fragmenting
before alignment would lead to multiple unaligned requests, rather
than just the head and tail).

When fragmenting a large request where FUA was requested, but
where we know that FUA is implemented by flushing all requests
rather than the given request, then we can still get by with
only one flush.  Note, however, that we need a followup patch
to the raw format driver to avoid a regression in the number of
flushes actually issued.

The return value was previously nebulous on success (sometimes
zero, sometimes the length written); since we never have a short
write, and since fragmenting may store yet another positive
value in 'ret', change the function to always return 0 on success,
matching what we do in bdrv_aligned_preadv().

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468607524-19021-4-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
8a39b4d6e2 raw_bsd: Don't advertise flags not supported by protocol layer
The raw format layer supports all flags via passthrough - but
it only makes sense to pass through flags that the lower layer
actually supports.

The next patch gives stronger reasoning for why this is correct.
At the moment, the raw format layer ignores the max_transfer
limit of its protocol layer, and an attempt to do the qemu-io
'w -f 0 40m' to an NBD server that lacks FUA will pass the entire
40m request to the NBD driver, which then fragments the request
itself into a 32m write, 8m write, and flush.  But once the block
layer starts honoring limits and fragmenting packets, the raw
driver will hand the NBD driver two separate requests; if both
requests have BDRV_REQ_FUA set, then this would result in a 32m
write, flush, 8m write, and second flush.  By having the raw
layer no longer advertise FUA support when the protocol layer
lacks it, we are back to a single flush at the block layer for
the overall 40m request.

Note that 'w -f -z 0 40m' does not currently exhibit the same
problem, because there, the fragmentation does not occur until
at the NBD layer (the raw layer has .bdrv_co_pwrite_zeroes, and
the NBD layer doesn't advertise max_pwrite_zeroes to constrain
things at the raw layer) - but the problem is latent and we
would again have too many flushes without this patch once the
NBD layer implements support for the new NBD_CMD_WRITE_ZEROES
command, if it sets max_pwrite_zeroes to the same 32m limit as
recommended by the NBD protocol.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468607524-19021-3-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Eric Blake
1a62d0accd block: Fragment reads to max transfer length
Drivers should be able to rely on the block layer honoring the
max transfer length, rather than needing to return -EINVAL
(iscsi) or manually fragment things (nbd).  This patch adds
the fragmentation in the block layer, after requests have been
aligned (fragmenting before alignment would lead to multiple
unaligned requests, rather than just the head and tail).

The return value was previously nebulous on success on whether
it was zero or the length read; and fragmenting may introduce
yet other non-zero values if we use the last length read.  But
as at least some callers are sloppy and expect only zero on
success, it is easiest to just guarantee 0.

[Fix uninitialized ret local variable in bdrv_aligned_preadv().
--Stefan]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-id: 1468607524-19021-2-git-send-email-eblake@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-20 14:11:54 +01:00
Peter Maydell
338404d061 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160719' into staging
target-arm queue:
 * fix two minor Coverity complaints

# gpg: Signature made Tue 19 Jul 2016 18:02:34 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160719:
  arm_gicv3: Add assert()s to tell Coverity that offsets are aligned
  target-arm: Fix unreachable code in gicv3_class_name()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 12:48:18 +01:00
Gerd Hoffmann
87ae924b73 usbredir: add streams property
Enabled by default, can be used to turn off (usb3) streams support.
xhci has a such a property too (same name, same default).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1468408474-17648-1-git-send-email-kraxel@redhat.com
2016-07-20 13:31:20 +02:00
Alexey Kardashevskiy
f81bb347ef xhci: Fix possible side effect from assert()
A static analysis tool called BEAM detected possible side effect from
assert() calling a helper which may change an XHCI ring after every call.

This moves xhci_ring_fetch() out of assert() so it will be called
with and without enabled debug.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Message-id: 1468812548-31868-1-git-send-email-aik@ozlabs.ru
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-20 13:31:09 +02:00
Alex Bennée
b7c851b2b8 docker: pass EXECUTABLE to build script
To build a docker image with which needs qemu linux-user emulation we
need to pass --include-executable to the build script. Using the same
mechanism as for other container controls we enable the option is
EXECUTABLE is set on the make command line e.g:

    make docker-image-debian-bootstrap V=1 J=9 DEB_ARCH=armhf \
        DEB_TYPE=stable EXECUTABLE=./arm-linux-user/qemu-arm

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-11-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Fam Zheng
ff31e2256d docker: Don't start a container that doesn't exist
Image building targets are dependencies of test running targets, so when
a docker image doesn't exist, it means it's skipped (due to dependency
checks in pre script). Therefore, skip the test too.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-10-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
4b08af6019 docker: Add "images" subcommand to docker.py
This is a wrapper for the 'docker images' command.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-9-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
1ad76b8af8 docker: Fix exit code if $CMD failed
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-8-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Fam Zheng
c81585130e docker: More sensible run script
It is very easy to figure out current directory and bash option from the
execution, so do less in the Makefile invocation command line, and
figure both options in the script.

This makes the next patch easier.

Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-7-git-send-email-famz@redhat.com
2016-07-20 19:19:43 +08:00
Alex Bennée
6e733da676 tests/docker/docker.py: add update operation
This adds a new operation to the docker script to allow updating of
binaries in an existing container. This is because it would be
inefficient to re-build the whole container just for an update to the
QEMU binary.

To update the executable run:

    ./tests/docker/docker.py update \
        debian:armhf ./arm-linux-user/qemu-arm

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-6-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
95c975013a tests/docker/dockerfiles: new debian-bootstrap.docker
Together with the debian-bootstrap.pre script can now build an arbitrary
architecture of Debian using debootstrap. This allows debootstrap to set
up its first stage before the container is built.

To build a container you need a command line like:

  DEB_ARCH=armhf DEB_TYPE=testing \
    ./tests/docker/docker.py build \
    --include-executable=arm-linux-user/qemu-arm debian:armhf \
    ./tests/docker/dockerfiles/debian-bootstrap.docker

Although a number of non-debian systems package the debootstrap script
it is fairly portable in itself. Assuming we have some sort of fakeroot
implementation we can just clone the upstream repository and use the
script from there.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-5-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
920776ea5e tests/docker/docker.py: check and run .pre script
The docker script will now search for an associated $dockerfile.pre
script which gets run in the same build context as the dockerfile will
be. This is to support pre-seeding the build context before running the
docker build.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-4-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
504ca3c208 tests/docker/docker.py: support --include-executable
When passed the path to a binary we copy it and any linked libraries (if
it is dynamically linked) into the docker build context. These can then
be included by a dockerfile with the line:

  # Copy all of context into container
  ADD . /

This is mainly intended for setting up foreign architecture docker
images which use qemu-$arch to do cross-architecture linux-user
execution. It also relies on the host and guest file-system following
reasonable multi-arch layouts so the copied libraries don't clash with
the guest ones.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 1468934445-32183-3-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Alex Bennée
a9f8d03891 tests/docker/docker.py: docker_dir outside build
Instead of letting the build_image create the temporary working dir we
move the creation to the build command. This is preparation for the
later patches where additional files can be added to the build context
before the build step is run.

We also ensure we remove the build context after we are done (mkdtemp
doesn't do this automatically for you).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468934445-32183-2-git-send-email-famz@redhat.com
Signed-off-by: Fam Zheng <famz@redhat.com>
2016-07-20 19:19:43 +08:00
Peter Maydell
1ecfb24da9 Merge remote-tracking branch 'remotes/riku/tags/pull-linux-user-20160719-2' into staging
linux-user fixes before 2.7 freeze, fix commit message

# gpg: Signature made Tue 19 Jul 2016 14:18:54 BST
# gpg:                using RSA key 0xB44890DEDE3C9BC0
# gpg: Good signature from "Riku Voipio <riku.voipio@iki.fi>"
# gpg:                 aka "Riku Voipio <riku.voipio@linaro.org>"
# Primary key fingerprint: FF82 03C8 C391 98AE 0581  41EF B448 90DE DE3C 9BC0

* remotes/riku/tags/pull-linux-user-20160719-2:
  linux-user: AArch64 has sync_file_range, not sync_file_range2
  linux-user: Fix type for SIOCATMARK ioctl
  linux-user: define missing sparc syscalls
  linux-user: Fix terminal control ioctls
  linux-user: Add some new blk ioctls
  linux-user: Handle short lengths in host_to_target_sockaddr()
  linux-user: Forget about synchronous signal once it is delivered
  linux-user: Correct type for LOOP_GET_STATUS{,64} ioctls
  linux-user: Correct type for BLKSSZGET
  linux-user: Add loop control ioctls
  linux-user: Check sigsetsize argument to syscalls
  linux-user: add nested netlink types
  linux-user: convert sockaddr_ll from host to target
  linux-user: add fd_trans helper in do_recvfrom()
  linux-user: fix netlink memory corruption
  linux-user: fd_trans_*_data() returns the length

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-20 11:32:12 +01:00
Gerd Hoffmann
e0127d2eec qxl: fix qxl_set_dirty call in qxl_dirty_one_surface
qxl_set_dirty() expects start and end as range specification.
qxl_dirty_one_surface passes 'size' instead of 'offset + size' as end
parameter.  Fix that.  Also use uint64_t everywhere while being at it.

Bug was added by "e25139b qxl: set only off-screen surfaces dirty instead
of the whole vram" and carried forward unnoticed by "5cdc402 qxl: fix
surface migration".

Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 1468413187-22071-1-git-send-email-kraxel@redhat.com
2016-07-20 12:08:14 +02:00
Prasanna Kumar Kalever
6c7189bb29 block/gluster: add support for multiple gluster servers
This patch adds a way to specify multiple volfile servers to the gluster
block backend of QEMU with tcp|rdma transport types and their port numbers.

Problem:

Currently VM Image on gluster volume is specified like this:

file=gluster[+tcp]://host[:port]/testvol/a.img

Say we have three hosts in a trusted pool with replica 3 volume in action.
When the host mentioned in the command above goes down for some reason,
the other two hosts are still available. But there's currently no way
to tell QEMU about them.

Solution:

New way of specifying VM Image on gluster volume with volfile servers:
(We still support old syntax to maintain backward compatibility)

Basic command line syntax looks like:

Pattern I:
 -drive driver=gluster,
        volume=testvol,path=/path/a.raw,[debug=N,]
        server.0.type=tcp,
        server.0.host=1.2.3.4,
        server.0.port=24007,
        server.1.type=unix,
        server.1.socket=/path/socketfile

Pattern II:
 'json:{"driver":"qcow2","file":{"driver":"gluster",
       "volume":"testvol","path":"/path/a.qcow2",["debug":N,]
       "server":[{hostinfo_1}, ...{hostinfo_N}]}}'

   driver      => 'gluster' (protocol name)
   volume      => name of gluster volume where our VM image resides
   path        => absolute path of image in gluster volume
  [debug]      => libgfapi loglevel [(0 - 9) default 4 -> Error]

  {hostinfo}   => {{type:"tcp",host:"1.2.3.4"[,port=24007]},
                   {type:"unix",socket:"/path/sockfile"}}

   type        => transport type used to connect to gluster management daemon,
                  it can be tcp|unix
   host        => host address (hostname/ipv4/ipv6 addresses/socket path)
   port        => port number on which glusterd is listening.
   socket      => path to socket file

Examples:
1.
 -drive driver=qcow2,file.driver=gluster,
        file.volume=testvol,file.path=/path/a.qcow2,file.debug=9,
        file.server.0.type=tcp,
        file.server.0.host=1.2.3.4,
        file.server.0.port=24007,
        file.server.1.type=unix,
        file.server.1.socket=/var/run/glusterd.socket
2.
  'json:{"driver":"qcow2","file":{"driver":"gluster","volume":"testvol",
         "path":"/path/a.qcow2","debug":9,"server":
         [{"type":"tcp","host":"1.2.3.4","port":"24007"},
          {"type":"unix","socket":"/var/run/glusterd.socket"}
         ]}}'

This patch gives a mechanism to provide all the server addresses, which are in
replica set, so in case host1 is down VM can still boot from any of the
active hosts.

This is equivalent to the backup-volfile-servers option supported by
mount.glusterfs (FUSE way of mounting gluster volume)

credits: sincere thanks to all the supporters

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1468947453-5433-6-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:38:50 -04:00
Prasanna Kumar Kalever
7edac2ddeb block/gluster: using new qapi schema
this patch adds 'GlusterServer' related schema in qapi/block-core.json

[Jeff: minor fix-ups of comments and formatting, per patch reviews]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468947453-5433-5-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:36:11 -04:00
Prasanna Kumar Kalever
0552ff2465 block/gluster: deprecate rdma support
gluster volfile server fetch happens through unix and/or tcp, it doesn't
support volfile fetch over rdma. The rdma code may actually mislead,
so to make sure things do not break, for now we fallback to tcp when requested
for rdma, with a warning.

If you are wondering how this worked all these days, its the gluster libgfapi
code which handles anything other than unix transport as socket/tcp, sad but
true.

Also gluster doesn't support ipv6 addresses, removing the ipv6 related
comments/docs section

[Jeff: Minor grammatical fixes in comments and commit message, per
review comments]

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-4-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:20:44 -04:00
Prasanna Kumar Kalever
f70c50c817 block/gluster: code cleanup
unified coding styles of multiline function arguments and other error functions
moved random declarations of structures and other list variables

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-3-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:08:12 -04:00
Prasanna Kumar Kalever
d5cf4079ca block/gluster: rename [server, volname, image] -> [host, volume, path]
A future patch will add support for multiple gluster servers. Existing
terminology is a bit unusual in relation to what names are used by
other networked devices, and doesn't map very well to the terminology
we expect to use for multiple servers.  Therefore, rename the following
options:
'server'  -> 'host'
'image'   -> 'path'
'volname' -> 'volume'

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1468947453-5433-2-git-send-email-prasanna.kalever@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:08:12 -04:00
Denis V. Lunev
cf56a3c632 mirror: fix request throttling in drive-mirror
There are 2 deficiencies here:
- mirror_iteration could start several requests inside. Thus we could
  simply have more in_flight requests than MAX_IN_FLIGHT.
- keeping this in mind throttling in mirror_run which is checking
  s->in_flight == MAX_IN_FLIGHT is wrong.

The patch adds the check and throttling into mirror_iteration and fixes
the check in mirror_run() to be sure.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1466598927-5990-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
(cherry picked from commit e648dc95c28fbca12e67be26a1fc4b9a0676c3fe)
2016-07-19 17:03:44 -04:00
Denis V. Lunev
4b5004d9fc mirror: improve performance of mirroring of empty disk
We should not take into account zero blocks for delay calculations.
They are not read and thus IO throttling is not required. In the
other case VM migration with 16 Tb QCOW2 disk with 4 Gb of data takes
days.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-9-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 17:02:49 -04:00
Denis V. Lunev
c7c2769c0e mirror: efficiently zero out target
With a bdrv_co_write_zeroes method on a target BDS and when this method
is working as indicated by the bdrv_can_write_zeroes_with_unmap(), zeroes
will not be placed into the wire. Thus the target could be very efficiently
zeroed out. This should be done with the largest chunk possible.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-8-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
b7d5062c9c mirror: optimize dirty bitmap filling in mirror_run a bit
There is no need to scan allocation tables if we have mark_all_dirty flag
set. Just mark it all dirty.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-7-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
2f0342efdb block: remove extra condition in bdrv_can_write_zeroes_with_unmap
All .bdrv_co_write_zeroes callbacks nowadays work perfectly even
with backing store attached. If future new callbacks would be unable to do
that - they have a chance to block this in bdrv_get_info().

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-6-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
c0b363ad43 mirror: create mirror_dirty_init helper for mirror_run
The code inside the helper will be extended in the next patch. mirror_run
itself is overbloated at the moment.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy<vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-5-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
49efb1f5b0 mirror: create mirror_throttle helper
The patch also places last_pause_ns from stack in mirror_run into
MirrorBlockJob structure. This helper will be useful in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468503209-19498-4-git-send-email-den@openvz.org
CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
CC: Eric Blake <eblake@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
531509ba28 mirror: make sectors_in_flight int64_t
We keep here the sum of int fields. Thus this could easily overflow,
especially when we will start sending big requests in next patches.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-3-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Denis V. Lunev
6d07859926 dirty-bitmap: operate with int64_t amount
Underlying HBitmap operates even with uint64_t. Thus this change is safe.
This would be useful f.e. to mark entire bitmap dirty in one call.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468503209-19498-2-git-send-email-den@openvz.org
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-07-19 16:54:46 -04:00
Eric Blake
0e55c381f6 net: Use correct type for bool flag
is_netdev is only used as a bool, so make it one.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-14-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 20:18:27 +02:00
Eric Blake
f394b2e20d qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union.  The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.

While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place.  Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two.  Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.

Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 20:18:02 +02:00
Peter Maydell
acd8279621 arm_gicv3: Add assert()s to tell Coverity that offsets are aligned
Coverity complains that the GICR_IPRIORITYR case in gicv3_readl()
can overflow an array, because it doesn't know that the offsets
passed to that function must be word aligned. Add some assert()s
which hopefully tell Coverity that this isn't possible.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1468261372-17508-1-git-send-email-peter.maydell@linaro.org
2016-07-19 17:56:27 +01:00
Peter Maydell
ff9d3e9cd9 target-arm: Fix unreachable code in gicv3_class_name()
Coverity complains that the exit() in gicv3_class_name()
can be unreachable, because if TARGET_AARCH64 is defined
then all code paths return before reaching it. Move the
exit() up to the error_report() that it belongs with.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1468260552-8400-1-git-send-email-peter.maydell@linaro.org
2016-07-19 17:56:27 +01:00
Peter Maydell
5d3217340a disas: Fix ATTRIBUTE_UNUSED define clash with ALSA headers
disas/bfd.h defines ATTRIBUTE_UNUSED, but unfortunately the
ALSA system headers also define this macro, which means that
you can get a compilation failure if building with ALSA and
any files happen to include the alsa headers before bfd.h
rather than the other way around.

This is unfortunate namespace pollution by the ALSA headers but
we can work around it. Add an #ifndef guard to bfd.h and remove
the unnecessary extra definition in disas/arm.c to fix this.

Reported-by: BALATON Zoltan <balaton@eik.bme.hu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468937076-21503-1-git-send-email-peter.maydell@linaro.org
2016-07-19 16:40:39 +01:00
Peter Maydell
a3b3437721 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* two old patches from prospective GSoC students
* i386 -kernel device tree support
* Coverity fix
* memory usage improvement from Peter
* checkpatch fix
* g_path_get_dirname cleanup
* caching of block status for iSCSI

# gpg: Signature made Tue 19 Jul 2016 07:43:41 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  target-i386: Remove redundant HF_SOFTMMU_MASK
  block/iscsi: allow caching of the allocation map
  block/iscsi: fix rounding in iscsi_allocationmap_set
  Move README to markdown
  cpu-exec: Move down some declarations in cpu_exec()
  exec: avoid realloc in phys_map_node_reserve
  checkpatch: consider git extended headers valid patches
  megasas: remove useless check for cmd->frame
  compiler: never omit assertions if using a static analysis tool
  hw/i386: add device tree support
  Changed malloc to g_malloc, free to g_free in bsd-user/qemu.h
  use g_path_get_dirname instead of dirname

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 15:08:05 +01:00
Peter Maydell
db3e07dbde Merge remote-tracking branch 'remotes/stsquad/tags/pull-travis-20160718-1' into staging
Make IRC a little less noisy

# gpg: Signature made Mon 18 Jul 2016 16:42:57 BST
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-travis-20160718-1:
  .travis.yml: Disable IRC build status updates from forks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 14:41:41 +01:00
Peter Maydell
c4e1cbd437 linux-user: AArch64 has sync_file_range, not sync_file_range2
The AArch64 Linux ABI syscall 84 is sync_file_range, not
sync_file_range2 (in the kernel it uses the asm-generic
headers and does not define __ARCH_WANT_SYNC_FILE_RANGE2).
Update our TARGET_NR_* definitions accordingly.

This fixes the sync_file_range syscall which otherwise
gets its arguments in the wrong order.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Peter Maydell
a57f1f8f52 linux-user: Fix type for SIOCATMARK ioctl
The SIOCATMARK ioctl takes an argument which should be a
pointer to an integer where the kernel will write the result.
We were incorrectly declaring it as TYPE_NULL which would mean
it would always fail (with EFAULT) when it should succeed.
Correct the type.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Laurent Vivier
74642d091a linux-user: define missing sparc syscalls
NR_lookup_dcookie, NR_fadvise64, NR_fadvise64_64

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:18:11 +03:00
Timothy Pearson
5b7f7bb39e linux-user: Fix terminal control ioctls
TIOCGPTN and related terminal control ioctls were not converted to the guest ioctl format on x86_64 targets. Convert these ioctls to enable terminal functionality on x86_64 guests.

Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 16:16:17 +03:00
Peter Maydell
1df8ffb286 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vnc-20160719-1' into staging
vnc: bugfixes for -rc0

# gpg: Signature made Tue 19 Jul 2016 08:27:05 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vnc-20160719-1:
  vnc-tight: fix regression with libxenstore
  vnc-enc-tight: fix off-by-one bug
  vnc: make sure we finish disconnect

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 14:07:27 +01:00
Peter Maydell
68ab919f0b Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Tue 19 Jul 2016 07:42:30 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to e79bca6 built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 13:41:31 +01:00
Peter Maydell
4715856a68 linux-user: Add some new blk ioctls
Add some new blk ioctls (these are 0x12,119 through
to 0x12,127). Several of these are used by mke2fs; this silences
the warnings:

mke2fs 1.42.12 (29-Aug-2014)
Unsupported ioctl: cmd=0x127b
Unsupported ioctl: cmd=0x127a
warning: Unable to get device geometry for /dev/loop5
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x127c
Unsupported ioctl: cmd=0x1277

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:17 +03:00
Peter Maydell
a1e221929f linux-user: Handle short lengths in host_to_target_sockaddr()
If userspace specifies a short buffer for a target sockaddr,
the kernel will only copy in as much as it has space for
(or none at all if the length is zero) -- see the kernel
move_addr_to_user() function. Mimic this in QEMU's
host_to_target_sockaddr() routine.

In particular, this fixes a segfault running the LTP
recvfrom01 test, where the guest makes a recvfrom()
call with a bad buffer pointer and other parameters which
cause the kernel to set the addrlen to zero; because we
did not skip the attempt to swap the sa_family field we
segfaulted on the bad address.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:17 +03:00
Peter Maydell
31efaef1d9 linux-user: Forget about synchronous signal once it is delivered
Commit 655ed67c2a which switched synchronous signals to
benig recorded in ts->sync_signal rather than in a queue
with every other signal had a bug: we failed to clear
the flag indicating that a synchronous signal was pending
when we delivered it. This meant that we would take the signal
again and again every time the guest made a syscall.
(This is a bug introduced in my refactoring of Timothy Baldwin's
original code.)

Fix this by passing in the struct emulated_sigtable* to
handle_pending_signal(), so that we clear the pending flag
in the ts->sync_signal struct when handling a synchronous signal.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
f2c2fb50be linux-user: Correct type for LOOP_GET_STATUS{,64} ioctls
The LOOP_GET_STATUS and LOOP_GET_STATUS64 ioctls were incorrectly
defined as IOC_W rather than IOC_R, which meant we weren't
correctly copying the information back from the kernel to the guest.
The loop_info64 structure definition was also missing a member
and using the wrong type for several 32-bit fields.

In particular, this meant that "kpartx -d image.img" didn't work
and "losetup -a" behaved strangely. Correct the ioctl type definitions.

Reported-by: Chanho Park <chanho61.park@samsung.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
a4a2c51f90 linux-user: Correct type for BLKSSZGET
The BLKSSZGET ioctl takes an argument which is a pointer to an int.
We were incorrectly declaring it to take a pointer to a long, which
meant that we would incorrectly write to memory which we should not
if the guest is a 64-bit architecture.

In particular, kpartx uses this ioctl to write to an int on the
stack, which tends to result in it crashing immediately.

Reported-by: Chanho Park <chanho61.park@samsung.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:23:16 +03:00
Peter Maydell
884cdc48a9 linux-user: Add loop control ioctls
Add support for the /dev/loop-control ioctls:
 LOOP_CTL_ADD
 LOOP_CTL_REMOVE
 LOOP_CTL_GET_FREE

[RV: fixed to apply to new header guards]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:22:33 +03:00
Peter Maydell
c815701e81 linux-user: Check sigsetsize argument to syscalls
Many syscalls which take a sigset_t argument also take an argument
giving the size of the sigset_t.  The kernel insists that this
matches its idea of the type size and fails EINVAL if it is not.
Implement this logic in QEMU.  (This mostly just means some LTP test
cases which check error cases now pass.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
2016-07-19 15:20:59 +03:00
Laurent Vivier
c5dff280b8 linux-user: add nested netlink types
Nested types are used by the kernel to send link information and
protocol properties.

We can see following errors with "ip link show":

Unimplemented nested type 26
Unimplemented nested type 26
Unimplemented nested type 18
Unimplemented nested type 26
Unimplemented nested type 18
Unimplemented nested type 26

This patch implements nested types 18 (IFLA_LINKINFO) and
26 (IFLA_AF_SPEC).

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
a82ea9393d linux-user: convert sockaddr_ll from host to target
As we convert sockaddr for AF_PACKET family for sendto() (target to
host) we need also to convert this for getsockname() (host to target).

arping uses getsockname() to get the the interface address and uses
this address with sendto().

Tested with:

    /sbin/arping -D -q -c2 -I eno1 192.168.122.88

...
getsockname(3, {sa_family=AF_PACKET, proto=0x806, if2,
pkttype=PACKET_HOST, addr(6)={1, 10c37b6b9a76}, [18]) = 0
...
sendto(3, "..." 28, 0,
       {sa_family=AF_PACKET, proto=0x806, if2, pkttype=PACKET_HOST,
       addr(6)={1, ffffffffffff}, 20) = 28
...

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
c35e1f9c87 linux-user: add fd_trans helper in do_recvfrom()
Fix passwd using netlink audit.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
7d61d89232 linux-user: fix netlink memory corruption
Netlink is byte-swapping data in the guest memory (it's bad).

It's ok when the data come from the host as they are generated by the
host.

But it doesn't work when data come from the guest: the guest can
try to reuse these data whereas they have been byte-swapped.

This is what happens in glibc:

glibc generates a sequence number in nlh.nlmsg_seq and calls
sendto() with this nlh. In sendto(), we byte-swap nlmsg.seq.

Later, after the recvmsg(), glibc compares nlh.nlmsg_seq with
sequence number given in return, and of course it fails (hangs),
because nlh.nlmsg_seq is not valid anymore.

The involved code in glibc is:

sysdeps/unix/sysv/linux/check_pf.c:make_request()
...
  req.nlh.nlmsg_seq = time (NULL);
...
  if (TEMP_FAILURE_RETRY (__sendto (fd, (void *) &req, sizeof (req), 0,
                                    (struct sockaddr *) &nladdr,
                                    sizeof (nladdr))) < 0)
<here req.nlh.nlmsg_seq has been byte-swapped>
...
  do
    {
...
      ssize_t read_len = TEMP_FAILURE_RETRY (__recvmsg (fd, &msg, 0));
...
      struct nlmsghdr *nlmh;
      for (nlmh = (struct nlmsghdr *) buf;
           NLMSG_OK (nlmh, (size_t) read_len);
           nlmh = (struct nlmsghdr *) NLMSG_NEXT (nlmh, read_len))
        {
<we compare nlmh->nlmsg_seq with corrupted req.nlh.nlmsg_seq>
          if (nladdr.nl_pid != 0 || (pid_t) nlmh->nlmsg_pid != pid
              || nlmh->nlmsg_seq != req.nlh.nlmsg_seq)
            continue;
...
          else if (nlmh->nlmsg_type == NLMSG_DONE)
            /* We found the end, leave the loop.  */
            done = true;
        }
    }
  while (! done);

As we have a continue on "nlmh->nlmsg_seq != req.nlh.nlmsg_seq",
"done" cannot be set to "true" and we have an infinite loop.

It's why commands like "apt-get update" or "dnf update hangs".

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:59 +03:00
Laurent Vivier
ef759f6fcc linux-user: fd_trans_*_data() returns the length
fd_trans_target_to_host_data() and fd_trans_host_to_target_data() must
return the length of processed data.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
2016-07-19 15:20:58 +03:00
Peter Maydell
d25321f210 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Tue 19 Jul 2016 03:33:40 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  e1000e: fix building without CONFIG_VMXNET3_PCI
  MAINTAINERS: release Scott from being a rocker maintainer
  tap: fix memory leak on failure to create a multiqueue tap device
  net: fix incorrect argument to iov_to_buf
  net: fix incorrect access to pointer
  e1000e: fix incorrect access to pointer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 13:00:35 +01:00
Eric Blake
faecd40a59 block: Simplify drive-mirror
Now that we can support boxed commands, use it to greatly
reduce the number of parameters (and likelihood of getting
out of sync) when adjusting drive-mirror parameters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Message-Id: <1468535878-3760-1-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:09 +02:00
Eric Blake
4dc9397b62 block: Simplify block_set_io_throttle
Now that we can support boxed commands, use it to greatly
reduce the number of parameters (and likelihood of getting
out of sync) when adjusting throttle parameters.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Message-Id: <1468468228-27827-11-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
c818408e44 qapi: Implement boxed types for commands/events
Turn on the ability to pass command and event arguments in
a single boxed parameter, which must name a non-empty type
(although the type can be a struct with all optional members).
For structs, it makes it possible to pass a single qapi type
instead of a breakout of all struct members (useful if the
arguments are already in a struct or if the number of members
is large); for other complex types, it is now possible to use
a union or alternate as the data for a command or event.

The empty type may be technically feasible if needed down the
road, but it's easier to forbid it now and relax things to allow
it later, than it is to allow it now and have to special case
how the generated 'q_empty' type is handled (see commit 7ce106a9
for reasons why nothing is generated for the empty type).  An
alternate type is never considered empty, but now that a boxed
type can be either an object or an alternate, we have to provide
a trivial QAPISchemaAlternateType.is_empty().  The new call to
arg_type.is_empty() during QAPISchemaCommand.check() requires
that we first check the type in question; but there is no chance
of introducing a cycle since objects do not refer back to commands.

We still have a split in syntax checking between ad-hoc parsing
up front (merely validates that 'boxed' has a sane value) and
during .check() methods (if 'boxed' is set, then 'data' must name
a non-empty user-defined type).

Generated code is unchanged, as long as no client uses the
new feature.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-10-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Test files renamed to *-boxed-*]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
48825ca419 qapi: Plumb in 'boxed' to qapi generator lower levels
The next patch will add support for passing a qapi union type
as the 'data' of a command.  But to do that, the user function
for implementing the command, as called by the generated
marshal command, must take the corresponding C struct as a
single boxed pointer, rather than a breakdown into one
parameter per member.  Even without a union, being able to use
a C struct rather than a list of parameters can make it much
easier to handle coding with QAPI.

This patch adds the internal plumbing of a 'boxed' flag
associated with each command and event.  In several cases,
this means adding indentation, with one new dead branch and
the remaining branch being the original code more deeply
nested; this was done so that the new implementation in the
next patch is easier to review without also being mixed with
indentation changes.

For this patch, no behavior or generated output changes, other
than the testsuite outputting the value of the new flag
(always False for now).

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-9-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Identifier box renamed to boxed in two places]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
4d0b268fdb qapi-event: Simplify visit of non-implicit data
Commit 7ce106a9 documented why we don't generated a visit_type_FOO()
for implicit types; and therefore events with an anonymous type for
'data' have to open-code a visit.  Note that the open-coded visit in
qapi-event.c is slightly different from what is done in
qapi-visit.c for normal types, in part because we don't have to
check for *obj being NULL or free things on error.  But where the
type is not implicit, it is nicer to reuse the normal visit instead
of open-coding a duplicate.

At the moment, the only event with a non-implicit 'data' is in the
testsuite, where test-qapi-event.c changes as follows:

|@@ -155,6 +155,7 @@ void qapi_event_send___org_qemu_x_event(
|     __org_qemu_x_Struct param = {
|         __org_qemu_x_member1, (char *)__org_qemu_x_member2, has_q_wchar_t, q_wchar_t
|     };
|+    __org_qemu_x_Struct *arg = &param;
|
|     emit = qmp_event_get_func_emit();
|     if (!emit) {
|@@ -164,16 +165,7 @@ void qapi_event_send___org_qemu_x_event(
|     qmp = qmp_event_build_dict("__ORG.QEMU_X-EVENT");
|
|     v = qmp_output_visitor_new(&obj);
|-
|-    visit_start_struct(v, "__ORG.QEMU_X-EVENT", NULL, 0, &err);
|-    if (err) {
|-        goto out;
|-    }
|-    visit_type___org_qemu_x_Struct_members(v, &param, &err);
|-    if (!err) {
|-    if (!err) {
|-        visit_check_struct(v, &err);
|-    }
|-    visit_end_struct(v, NULL);
|+    visit_type___org_qemu_x_Struct(v, "__ORG.QEMU_X-EVENT", &arg, &err);
|     if (err) {
|         goto out;
|     }

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-8-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
fa274ed6fb qapi: Drop useless gen_err_check()
Ever since commit 12f254f removed the last parameterization
of gen_err_check(), it no longer makes sense to hide the three
lines of generated C code behind a macro call. Just inline it
into the remaining users.

No change to generated code.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-7-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
b616770682 qapi: Add type.is_empty() helper
In the near future, we want to lift our artificial restriction of
no variants at the top level of an event, at which point the
currently open-coded check for empty members will become
insufficient.  Factor it out into a new helper method is_empty()
now, and future-proof it by checking variants, too, along with an
assert that it is not used prior to the completion of .check().
Update places that were checking for (non-)empty .members to use
the new helper.

All of the current callers assert that there are no variants (either
directly, or by qapi.py asserting that base types have no variants),
so this is not a semantic change.

No change to generated code.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-6-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
da9cb19385 qapi: Hide tag_name data member of variants
Clean up the only remaining external use of the tag_name field of
QAPISchemaObjectTypeVariants, by explicitly listing the generated
'type' tag for all variants in the testsuite (you can still tell
simple unions by the -wrapper types).  Then we can mark the
tag_name field as private by adding a leading underscore to prevent
any further use.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-5-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
cd50a25645 qapi: Special case c_name() for empty type
Commit 7ce106a rendered QAPISchemaObjectType.c_name() redundant,
since it now does nothing more than delegate to its superclass.
However, rather than deleting it, we can restore part of the
assertion that was removed in that commit, to prove that we never
emit the empty type directly in generated code, but rather
special-case it as a built-in that makes other aspects of code
generation easier to reason about.

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-4-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Eric Blake
d0b182392d qapi: Require all branches of flat union enum to be covered
We were previously enforcing that all flat union branches were
found in the corresponding enum, but not that all enum values
were covered by branches.  The resulting generated code would
abort() if the user passes the uncovered enum value.

We don't automatically treat non-present branches in a flat
union as empty types, for symmetry with simple unions (there,
the enum type is generated from the list of all branches, so
there is no way to omit a branch but still have it be part of
the union).

A later patch will add shorthand so that branches that are empty
in flat unions can be declared as 'branch':{} instead of
'branch':'Empty', to avoid the need for an otherwise useless
explicit empty type.  [Such shorthand for simple unions is a bit
harder to justify, since we would still have to generate a
wrapper type that parses 'data':{}, rather than truly being an
empty branch with no additional siblings to the 'type' member.]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-3-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Kővágó, Zoltán
cebea51057 net: use Netdev instead of NetClientOptions in client init
This way we no longer need NetClientOptions and can convert Netdev
into a flat union.

Signed-off-by: Kővágó, Zoltán <DirtY.iCE.hu@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <93ffdfed7054529635e6acb935150d95dc173a12.1441627176.git.DirtY.iCE.hu@gmail.com>

[rework net_client_init1() to pass Netdev by copying from NetdevLegacy,
rather than merging the two types - which means that we still need
NetClientOptions after all.  Rebase to qapi changes. The bulk of the
patch is mechanical, replacing 'opts' by 'netdev->opts', while
net_client_init1() takes care of converting between legacy and modern
types.]

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-2-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Paolo Bonzini
3d344c2aab qapi: change QmpInputVisitor to QSLIST
This saves a lot of memory compared to a statically-sized array,
or at least 24kb could be considered a lot on an Atari ST.
It also makes the code more similar to QmpOutputVisitor.

This removes the limit on the depth of a QObject that can be processed
into a QAPI tree.  This is not a problem because QObjects can be
considered trusted; the text received on the QMP wire is untrusted
input, but the JSON parser already takes pains to limit the QObject tree
it creates.  We don't need the QMP input visitor to limit it again.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467906798-5312-3-git-send-email-pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[Commit message typo fixed]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Paolo Bonzini
fc76ae8b38 qapi: change QmpOutputVisitor to QSLIST
This saves a little memory compared to the doubly-linked QTAILQ.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467906798-5312-2-git-send-email-pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
[Comment tweaked to avoid long line]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-19 13:21:08 +02:00
Peter Maydell
ad31cd4c69 Merge remote-tracking branch 'remotes/jnsnow/tags/ide-pull-request' into staging
# gpg: Signature made Mon 18 Jul 2016 23:53:15 BST
# gpg:                using RSA key 0x7DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>"
# Primary key fingerprint: FAEB 9711 A12C F475 812F  18F2 88A9 064D 1835 61EB
#      Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76  CBD0 7DEF 8106 AAFC 390E

* remotes/jnsnow/tags/ide-pull-request:
  block: ignore flush requests when storage is clean
  tests: in IDE and AHCI tests perform DMA write before flushing
  ide: set retry_unit for PIO and FLUSH requests
  ide: refactor retry_unit set and clear into separate function

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 11:47:07 +01:00
Peter Maydell
0c1b58f250 Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
# gpg: Signature made Mon 18 Jul 2016 22:59:55 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/tracing-pull-request:
  trace: Add QAPI/QMP interfaces to query and control per-vCPU tracing state
  trace: Allow event name pattern in "info trace-events"
  trace: Conditionally trace events based on their per-vCPU state
  trace: Add per-vCPU tracing states for events with the 'vcpu' property
  trace: Cosmetic changes on fast-path tracing
  disas: Remove unused macro '_'
  trace: Identify events with the 'vcpu' property
  trace: [bsd-user] Commandline arguments to control tracing
  trace: [linux-user] Commandline arguments to control tracing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 10:54:49 +01:00
Peter Maydell
08b558f07b Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20160718.0' into staging
VFIO update 2016-07-18

One fix for 2.7-rc0 which hides the ARI extended capability, fixing
multifunction support in PCIe configurations where the assigned device
function topology does not match the host (Alex Williamson)

# gpg: Signature made Mon 18 Jul 2016 18:02:27 BST
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-update-20160718.0:
  vfio/pci: Hide ARI capability

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-19 09:02:05 +01:00
Mark Cave-Ayland
4f194ce819 Update OpenBIOS images to e79bca6 built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-19 07:41:52 +01:00
Sergey Fedorov
da6d48e334 target-i386: Remove redundant HF_SOFTMMU_MASK
'HF_SOFTMMU_MASK' is only set when 'CONFIG_SOFTMMU' is defined. So
there's no need in this flag: test 'CONFIG_SOFTMMU' instead.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20160715175852.30749-6-sergey.fedorov@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-19 08:34:53 +02:00
Peter Lieven
e1123a3b40 block/iscsi: allow caching of the allocation map
until now the allocation map was used only as a hint if a cluster
is allocated or not. If a block was not allocated (or Qemu had
no info about the allocation status) a get_block_status call was
issued to check the allocation status and possibly avoid
a subsequent read of unallocated sectors. If a block known to be
allocated the get_block_status call was omitted. In the other case
a get_block_status call was issued before every read to avoid
the necessity for a consistent allocation map. To avoid the
potential overhead of calling get_block_status for each and
every read request this took only place for the bigger requests.

This patch enhances this mechanism to cache the allocation
status and avoid calling get_block_status for blocks where
the allocation status has been queried before. This allows
for bypassing the read request even for smaller requests and
additionally omits calling get_block_status for known to be
unallocated blocks.

Signed-off-by: Peter Lieven <pl@kamp.de>
Message-Id: <1468831940-15556-3-git-send-email-pl@kamp.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-19 08:34:53 +02:00
Peter Lieven
eb36b953e0 block/iscsi: fix rounding in iscsi_allocationmap_set
when setting clusters as alloacted the boundaries have
to be expanded. As Paolo pointed out the calculation of
the number of clusters is wrong:

Suppose cluster_sectors is 2, sector_num = 1, nb_sectors = 6:

In the "mark allocated" case, you want to set 0..8, i.e.
cluster_num=0, nb_clusters=4.

   0--.--2--.--4--.--6--.--8
   <--|_________________|-->  (<--> = expanded)

Instead you are setting nb_clusters=3, so that 6..8 is not marked.

   0--.--2--.--4--.--6--.--8
   <--|______________|!!!     (! = wrong)

Cc: qemu-stable@nongnu.org
Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-Id: <1468831940-15556-2-git-send-email-pl@kamp.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-19 08:34:53 +02:00
Pranith Kumar
e5dfc5e8e7 Move README to markdown
Move the README file to markdown so that it makes the github page look
prettier. I know that github repo is a mirror and not the official
repo, but I think it doesn't hurt to have it in markdown format.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20160715043111.29007-1-bobby.prani@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-19 08:34:53 +02:00
Evgeny Yakovlev
3ff2f67a7c block: ignore flush requests when storage is clean
Some guests (win2008 server for example) do a lot of unnecessary
flushing when underlying media has not changed. This adds additional
overhead on host when calling fsync/fdatasync.

This change introduces a write generation scheme in BlockDriverState.
Current write generation is checked against last flushed generation to
avoid unnessesary flushes.

The problem with excessive flushing was found by a performance test
which does parallel directory tree creation (from 2 processes).
Results improved from 0.424 loops/sec to 0.432 loops/sec.
Each loop creates 10^3 directories with 10 files in each.

This affected some blkdebug testcases that were expecting error logs from
failure-injected flushes which are now skipped entirely
(tests 026 071 089).

This also affects the performance of block jobs and thus BLOCK_JOB_READY
events for driver-mirror and active block-commit commands now arrives
faster, before QMP send successfully returns to caller (tests 141 144).

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468870792-7411-5-git-send-email-den@openvz.org
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: John Snow <jsnow@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-18 18:19:01 -04:00
Evgeny Yakovlev
2dd7e10d7c tests: in IDE and AHCI tests perform DMA write before flushing
Due to changes in flush behaviour clean disks stopped generating
flush_to_disk events and IDE and AHCI tests that test flush commands
started to fail.

This change adds additional DMA writes to affected tests before sending
flush commands so that bdrv_flush actually generates flush_to_disk event.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468870792-7411-4-git-send-email-den@openvz.org
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: John Snow <jsnow@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-18 18:19:01 -04:00
Evgeny Yakovlev
35f78ab469 ide: set retry_unit for PIO and FLUSH requests
The following sequence of tests discovered a problem in IDE emulation:
1. Send DMA write to IDE device 0
2. Send CMD_FLUSH_CACHE to same IDE device which will be failed by block
layer using blkdebug script in tests/ide-test:test_retry_flush

When doing DMA request ide/core.c will set s->retry_unit to s->unit in
ide_start_dma. When dma completes ide_set_inactive sets retry_unit to -1.
After that ide_flush_cache runs and fails thanks to blkdebug.
ide_flush_cb calls ide_handle_rw_error which asserts that s->retry_unit
== s->unit. But s->retry_unit is still -1 after previous DMA completion
and flush does not use anything related to retry.

This patch restricts retry unit assertion only to ops that actually use
retry logic.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468870792-7411-3-git-send-email-den@openvz.org
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: John Snow <jsnow@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-18 18:19:01 -04:00
Evgeny Yakovlev
0eeee07e24 ide: refactor retry_unit set and clear into separate function
Code to set and clear state associated with retry in moved into
ide_set_retry and ide_clear_retry to make adding retry setups easier.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1468870792-7411-2-git-send-email-den@openvz.org
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: John Snow <jsnow@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
2016-07-18 18:19:01 -04:00
Lluís Vilanova
77e2b17272 trace: Add QAPI/QMP interfaces to query and control per-vCPU tracing state
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:23:12 +01:00
Lluís Vilanova
bd71211d55 trace: Allow event name pattern in "info trace-events"
Homogenizes the command capabilities with QMP.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:23:12 +01:00
Lluís Vilanova
40b9cd25f7 trace: Conditionally trace events based on their per-vCPU state
Events with the 'vcpu' property are conditionally emitted according to
their per-vCPU state. Other events are emitted normally based on their
global tracing state.

Note that the per-vCPU condition check applies to all tracing backends.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:23:12 +01:00
Lluís Vilanova
4815185902 trace: Add per-vCPU tracing states for events with the 'vcpu' property
Each vCPU gets a 'trace_dstate' bitmap to control the per-vCPU dynamic
tracing state of events with the 'vcpu' property.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:23:12 +01:00
Lluís Vilanova
e1d6e0a4c0 trace: Cosmetic changes on fast-path tracing
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:13:54 +01:00
Lluís Vilanova
ca66f1a174 disas: Remove unused macro '_'
Eliminates a future compilation error when UI code includes the tracing
headers (indirectly pulling "disas/bfd.h" through "qom/cpu.h") and
GLib's i18n '_' macro.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:13:54 +01:00
Lluís Vilanova
17f7ac75df trace: Identify events with the 'vcpu' property
A new event attribute 'cpu_id' is added to have a separate ID
space ('TRACE_VCPU_*') for all events with the 'vcpu' property.

These are later used to identify which events are enabled on each vCPU.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:13:54 +01:00
Lluís Vilanova
6913e79c36 trace: [bsd-user] Commandline arguments to control tracing
[Changed const char *trace_file to char *trace_file since it's a
heap-allocated string that needs to be freed.  This type is also
returned by trace_opt_parse() and used in vl.c.

Also fixed coding style on for(;;) and else statement as suggested by
Eric Blake <eblake@redhat.com> since the patch modifies these lines or
close enough.
--Stefan]

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 146860252322.30668.18276041739086338328.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:13:37 +01:00
Lluís Vilanova
6533dd6e11 trace: [linux-user] Commandline arguments to control tracing
[Changed const char *trace_file to char *trace_file since it's a
heap-allocated string that needs to be freed.  This type is also
returned by trace_opt_parse() and used in vl.c.
--Stefan]

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Message-id: 146860251784.30668.17339867835129075077.stgit@fimbulvetr.bsc.es
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 18:13:37 +01:00
Peter Maydell
a098fbc025 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 18 Jul 2016 17:58:27 BST
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  MAINTAINERS: Add include/block/aio.h to block I/O path section
  virtio-blk: dataplane cleanup
  checkpatch: consider git extended headers valid patches
  aio-posix: remove useless parameter
  linux-aio: prevent submitting more than MAX_EVENTS
  aio_ctx_check: follow CODING_STYLE
  linux-aio: share one LinuxAioState within an AioContext
  spec/parallels: fix a mistake

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-18 18:13:01 +01:00
Alex Williamson
383a7af7ec vfio/pci: Hide ARI capability
QEMU supports ARI on downstream ports and assigned devices may support
ARI in their extended capabilities.  The endpoint ARI capability
specifies the next function, such that the OS doesn't need to walk
each possible function, however this next function is relative to the
host, not the guest.  This leads to device discovery issues when we
combine separate functions into virtual multi-function packages in a
guest.  For example, SR-IOV VFs are not enumerated by simply probing
the function address space, therefore the ARI next-function field is
zero.  When we combine multiple VFs together as a multi-function
device in the guest, the guest OS identifies ARI is enabled, relies on
this next-function field, and stops looking for additional function
after the first is found.

Long term we should expose the ARI capability to the guest to enable
configurations with more than 8 functions per slot, but this requires
additional QEMU PCI infrastructure to manage the next-function field
for multiple, otherwise independent devices.  In the short term,
hiding this capability allows equivalent functionality to what we
currently have on non-express chipsets.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-07-18 10:55:17 -06:00
Pranith Kumar
cb4c2536d5 .travis.yml: Disable IRC build status updates from forks
We want the travis build bot to post notifications on IRC only for the
master qemu repository and not the various forks/branches of
others. Currently there is no direct option to restrict the updates to
one repository. This is being worked upon by the developers and
tracked in https://github.com/travis-ci/travis-ci/issues/1094.

Until such time, we can use the workaround as posted in
ref. https://github.com/facebook/flow/pull/1822.

This basically creates an ecrypted string which decrypts to qemu IRC
channel only on "qemu/qemu" repo and not on the forks. This enables
the build bot to notify the IRC only for the main repo.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2016-07-18 16:40:58 +01:00
Fam Zheng
e1029ae26d MAINTAINERS: Add include/block/aio.h to block I/O path section
This file is actually the header for async.c and aio-*.c., so add it to
the same section.

Suggested-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 1468826387-10473-1-git-send-email-famz@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Cao jin
ab3b9c1be8 virtio-blk: dataplane cleanup
No need duplicate the judgment, there is one in function entry.

Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1468814749-14510-1-git-send-email-caoj.fnst@cn.fujitsu.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Stefan Hajnoczi
f8dccbb634 checkpatch: consider git extended headers valid patches
Renames look like this with git-diff(1) when diff.renames = true is set:

  diff --git a/a b/b
  similarity index 100%
  rename from a
  rename to b

This raises the "Does not appear to be a unified-diff format patch"
error because checkpatch.pl only considers a diff valid if it contains
at least one "@@" hunk.

This patch accepts renames and copies too so that checkpatch.pl exits
successfully when a diff only renames/copies files.  The git diff
extended header format is described on the git-diff(1) man page.

Reported-by: Colin Lord <clord@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468576014-28788-1-git-send-email-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Cao jin
7e00346505 aio-posix: remove useless parameter
Parameter **errp of aio_context_setup() is useless, remove it
and clean up the related code.

Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Fam Zheng <famz@redhat.com>
Cc: Eric Blake <eblake@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1468578524-23433-1-git-send-email-caoj.fnst@cn.fujitsu.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Roman Pen
5e1b34a3fa linux-aio: prevent submitting more than MAX_EVENTS
Invoking io_setup(MAX_EVENTS) we ask kernel to create ring buffer for us
with specified number of events.  But kernel ring buffer allocation logic
is a bit tricky (ring buffer is page size aligned + some percpu allocation
are required) so eventually more than requested events number is allocated.

From a userspace side we have to follow the convention and should not try
to io_submit() more or logic, which consumes completed events, should be
changed accordingly.  The pitfall is in the following sequence:

    MAX_EVENTS = 128
    io_setup(MAX_EVENTS)

    io_submit(MAX_EVENTS)
    io_submit(MAX_EVENTS)

    /* now 256 events are in-flight */

    io_getevents(MAX_EVENTS) = 128

    /* we can handle only 128 events at once, to be sure
     * that nothing is pended the io_getevents(MAX_EVENTS)
     * call must be invoked once more or hang will happen. */

To prevent the hang or reiteration of io_getevents() call this patch
restricts the number of in-flights, which is now limited to MAX_EVENTS.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468415004-31755-1-git-send-email-roman.penyaev@profitbricks.com
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: qemu-devel@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Cao jin
6977d901c4 aio_ctx_check: follow CODING_STYLE
replace tab with spaces

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-id: 1468501843-14927-1-git-send-email-caoj.fnst@cn.fujitsu.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:10:52 +01:00
Paolo Bonzini
0187f5c9cb linux-aio: share one LinuxAioState within an AioContext
This has better performance because it executes fewer system calls
and does not use a bottom half per disk.

Originally proposed by Ming Lei.

[Changed #include "raw-aio.h" to "block/raw-aio.h" in win32-aio.c to fix
build error as reported by Peter Maydell <peter.maydell@linaro.org>.
--Stefan]

Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1467650000-51385-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

squash! linux-aio: share one LinuxAioState within an AioContext
2016-07-18 15:09:31 +01:00
Vladimir Sementsov-Ogievskiy
4e90ccc28e spec/parallels: fix a mistake
We have only one flag for now - Empty Image flag. The patch fixes unused
bits specification and marks bit 1 as usused.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-18 15:09:31 +01:00
Peter Maydell
3913d3707e Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160718' into staging
ppc patch queue 2016-07-18

Here's what ought to be the final ppc pull request before the 2.7 hard
freeze.  This set contains a rework of the DBDMA device for Mac
platforms, and some assorted cleanups and bugfixes.

# gpg: Signature made Mon 18 Jul 2016 05:35:27 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160718:
  ppc: Yet another fix for the huge page support detection mechanism
  target-ppc: fix left shift overflow in hpte_page_shift
  ppc/mmu-hash64: Remove duplicated #include statement
  ppc: abort if compat property contains an unknown value
  spapr: Ensure CPU cores are added contiguously and removed in LIFO order
  vfio/spapr: Remove stale ioctl() call
  ppc: Fix support for odd MSR combinations
  dbdma: reset io->processing flag for unassigned DBDMA channel rw accesses
  dbdma: set FLUSH bit upon reception of flush command for unassigned DBDMA channels
  dbdma: fix load_word/store_word value endianness
  dbdma: fix endian of DBDMA_CMDPTR_LO during branch
  dbdma: add per-channel debugging enabled via DEBUG_DBDMA_CHANMASK
  dbdma: always define DBDMA_DPRINTF and enable debug with DEBUG_DBDMA
  spapr: fix core unplug crash

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-18 11:24:15 +01:00
Jason Wang
103916cbe9 e1000e: fix building without CONFIG_VMXNET3_PCI
e1000e needs net_tx_pkt.o and net_rx_pkt.o too.

Cc: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Cc: Leonid Bloch <leonid.bloch@ravellosystems.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:17:02 +08:00
Jiri Pirko
a77f0a09fe MAINTAINERS: release Scott from being a rocker maintainer
As requested by Scott, removing him.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:16:58 +08:00
Paolo Bonzini
091a6b2acf tap: fix memory leak on failure to create a multiqueue tap device
Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:16:56 +08:00
Paolo Bonzini
4555ca6816 net: fix incorrect argument to iov_to_buf
Coverity reports a "suspicious sizeof" which is indeed wrong.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:16:52 +08:00
Paolo Bonzini
2c5e564f4d net: fix incorrect access to pointer
This is not dereferencing the pointer, and instead checking only
the value of the pointer.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:16:49 +08:00
Paolo Bonzini
1ac6c07f42 e1000e: fix incorrect access to pointer
This is not dereferencing the pointer, and instead checking only
the value of the pointer.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-18 16:16:20 +08:00
Thomas Huth
159d2e39a8 ppc: Yet another fix for the huge page support detection mechanism
Commit 86b50f2e1b ("Disable huge page support if it is not available
for main RAM") already made sure that huge page support is not announced
to the guest if the normal RAM of non-NUMA configurations is not backed
by a huge page filesystem. However, there is one more case that can go
wrong: NUMA is enabled, but the RAM of the NUMA nodes are not configured
with huge page support (and only the memory of a DIMM is configured with
it). When QEMU is started with the following command line for example,
the Linux guest currently crashes because it is trying to use huge pages
on a memory region that does not support huge pages:

 qemu-system-ppc64 -enable-kvm ... -m 1G,slots=4,maxmem=32G -object \
   memory-backend-file,policy=default,mem-path=/hugepages,size=1G,id=mem-mem1 \
   -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
   -numa node,nodeid=0 -numa node,nodeid=1

To fix this issue, we've got to make sure to disable huge page support,
too, when there is a NUMA node that is not using a memory backend with
huge page support.

Fixes: 86b50f2e1b
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:52:19 +10:00
Paolo Bonzini
b56d417b8d target-ppc: fix left shift overflow in hpte_page_shift
ps->pte_enc is a 32-bit value, which is shifted left and then compared
to a 64-bit value.  It needs a cast before the shift.

Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:45:44 +10:00
Thomas Huth
28f3331887 ppc/mmu-hash64: Remove duplicated #include statement
No need to include error-report.h twice here.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Greg Kurz
c4dfc14b55 ppc: abort if compat property contains an unknown value
It is not possible to set the compat property to an unknown value with
powerpc_set_compat(). Something must have gone terribly wrong in QEMU,
if we detect an "Internal error" in powerpc_get_compat(). Let's abort then.

This patch also drops the "max_compat ? *max_compat : -1" construct. It is
useless since max_compat is dereferenced a few lines above.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Bharata B Rao
5cbc64de25 spapr: Ensure CPU cores are added contiguously and removed in LIFO order
If CPU core addition or removal is allowed in random order leading to
holes in the core id range (and hence in the cpu_index range), migration
can fail as migration with holes in cpu_index range isn't yet handled
correctly.

Prevent this situation by enforcing the addition in contiguous order
and removal in LIFO order so that we never end up with holes in
cpu_index range.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
David Gibson
21bb3093e6 vfio/spapr: Remove stale ioctl() call
This ioctl() call to VFIO_IOMMU_SPAPR_TCE_REMOVE was left over from an
earlier version of the code and has since been folded into
vfio_spapr_remove_window().

It wasn't caught because although the argument structure has been removed,
the libc function remove() means this didn't trigger a compile failure.
The ioctl() was also almost certain to fail silently and harmlessly with
the bogus argument, so this wasn't caught in testing.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
2016-07-18 10:40:27 +10:00
Benjamin Herrenschmidt
36a24df84a ppc: Fix support for odd MSR combinations
MacOS uses an architecturally illegal MSR combination that
seems nonetheless supported by 32-bit processors, which is
to have MSR[PR]=1 and one or more of MSR[DR/IR/EE]=0.

This adds support for it. To work properly we need to also
properly include support for PR=1,{I,D}R=0 to the MMU index
used by the qemu TLB.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
2df778967b dbdma: reset io->processing flag for unassigned DBDMA channel rw accesses
Otherwise MacOS 9 hangs upon shutdown.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
894993905d dbdma: set FLUSH bit upon reception of flush command for unassigned DBDMA channels
This fixes MacOS 9 whereby it continually flushes and polls the status bits
until they are set to indicate a successful flush.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
e12f50b900 dbdma: fix load_word/store_word value endianness
The values to read/write to/from physical memory are copied directly to the
physical address with no endian swapping required.

Also add some extra information to debugging output while we are here.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
3f0d4128dc dbdma: fix endian of DBDMA_CMDPTR_LO during branch
The current DBDMA command is stored in little-endian format, so make sure
we convert it to match our CPU when updating the DBDMA_CMDPTR_LO register.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
3e49c43940 dbdma: add per-channel debugging enabled via DEBUG_DBDMA_CHANMASK
By default large amounts of DBDMA debugging are produced when often it is just
1 or 2 channels that are of interest. Introduce DEBUG_DBDMA_CHANMASK to allow
the developer to select the channels of interest at compile time, and then
further add the extra channel information to each debug statement where
possible.

Also clearly mark the start/end of DBDMA_run_bh to allow tracking the bottom
half execution.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Mark Cave-Ayland
ba0b17dd8f dbdma: always define DBDMA_DPRINTF and enable debug with DEBUG_DBDMA
Enabling DBDMA_DPRINTF unconditionally ensures that any errors in debug
statements are picked up immediately.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Greg Kurz
44d691f7d9 spapr: fix core unplug crash
If the host has 8 threads/core and the guest is started with:

-smp cores=1,threads=4,maxcpus=12

It is possible to crash QEMU by doing:

(qemu) device_add host-spapr-cpu-core,core-id=16,id=foo
(qemu) device_del foo
Segmentation fault

This happens because spapr_core_unplug() assumes cpu_dt_id == core_id.
As long as cpu_dt_id is derived from the non-table cpu_index, this is
only true when you plug cores with contiguous ids.

It is safer to be consistent: the DR connector was created with an
index that is immediately written to cc->core_id, and spapr_core_plug()
also relies on cc->core_id.

Let's use it also in spapr_core_unplug().

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-18 10:40:27 +10:00
Sergey Fedorov
ca7d8e1c9c cpu-exec: Move down some declarations in cpu_exec()
This will fix a compiler warning with -Wclobbered:

http://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg03347.html

Reported-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
Message-Id: <20160715193123.28113-1-sergey.fedorov@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:22 +02:00
Peter Lieven
101420b886 exec: avoid realloc in phys_map_node_reserve
this is the first step in reducing the brk heap fragmentation
created by the map->nodes memory allocation. Since the introduction
of RCU the freeing of the PhysPageMaps is delayed so that sometimes
several hundred are allocated at the same time.

Even worse the memory for map->nodes is allocated and shortly
afterwards reallocated. Since the number of nodes it grows
to in the end is the same for all PhysPageMaps remember this value
and at least avoid the reallocation.

The large number of simultaneous allocations (about 450 x 70kB in
my configuration) has to be addressed later.

Signed-off-by: Peter Lieven <pl@kamp.de>
Message-Id: <1468577030-21097-1-git-send-email-pl@kamp.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Stefan Hajnoczi
d211bd6016 checkpatch: consider git extended headers valid patches
Renames look like this with git-diff(1) when diff.renames = true is set:

  diff --git a/a b/b
  similarity index 100%
  rename from a
  rename to b

This raises the "Does not appear to be a unified-diff format patch"
error because checkpatch.pl only considers a diff valid if it contains
at least one "@@" hunk.

This patch accepts renames and copies too so that checkpatch.pl exits
successfully when a diff only renames/copies files.  The git diff
extended header format is described on the git-diff(1) man page.

Reported-by: Colin Lord <clord@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <1468576014-28788-1-git-send-email-stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Paolo Bonzini
8cc46787b5 megasas: remove useless check for cmd->frame
megasas_enqueue_frame always returns with non-NULL cmd->frame.
Remove the "else" part as it is dead code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Paolo Bonzini
8bff06a0bb compiler: never omit assertions if using a static analysis tool
Assertions help both Coverity and the clang static analyzer avoid
false positives, but on the other hand both are confused when
the condition is compiled as (void)(x != FOO).  Always expand
assertion macros when using Coverity or clang, through a new
QEMU_STATIC_ANALYSIS preprocessor symbol.

This fixes a couple false positives in TCG.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Antonio Borneo
3cbeb52467 hw/i386: add device tree support
With "-dtb" on command-line:
- append the device tree blob to the kernel image;
- pass the blob's pointer to the kernel through setup_data, as
  requested by upstream kernel commit da6b737b9ab7 ("x86: Add
  device tree support").

The device tree blob is passed as-is to the guest; none of its
fields is modified nor updated. This is not an issue; the kernel
commit above uses the device tree only as an extension to the
traditional kernel configuration.

To: "Michael S. Tsirkin" <mst@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
To: Richard Henderson <rth@twiddle.net>
To: Eduardo Habkost <ehabkost@redhat.com>
Cc: qemu-devel@nongnu.org
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Antonio Borneo <borneo.antonio@gmail.com>
Message-Id: <1459973054-2777-1-git-send-email-borneo.antonio@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Md Haris Iqbal
fd9a304830 Changed malloc to g_malloc, free to g_free in bsd-user/qemu.h
Signed-off-by: Md Haris Iqbal <haris.phnx@gmail.com>
Message-Id: <1459861743-4514-1-git-send-email-haris.phnx@gmail.com>
Reviewed-by: Sean Bruno <sbruno@freebsd.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Wei Jiangang
55ad781ca7 use g_path_get_dirname instead of dirname
Use g_path_get_basename to get the directory components of
a file name, and free its return when no longer needed.

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Message-Id: <1459997185-15669-3-git-send-email-weijg.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-17 09:59:21 +02:00
Peter Maydell
6b92bbfe81 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
Update OpenBIOS images

# gpg: Signature made Fri 15 Jul 2016 15:22:36 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  Update OpenBIOS images to b747b6a built from submodule.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-15 16:56:08 +01:00
Mark Cave-Ayland
5cebd885d0 Update OpenBIOS images to b747b6a built from submodule.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-15 15:18:15 +01:00
Peter Lieven
66668d197f vnc-tight: fix regression with libxenstore
commit 095497ff added thread local storage for the color counting
palette. Unfortunately, a VncPalette is about 7kB on a x86_64 system.
This memory is reserved from the stack of every thread and it
exhausted the stack space of a libxenstore thread.

Fix this by allocating memory only for the VNC encoding thread.

Fixes: 095497ffc6
Reported-by: Juergen Gross <jgross@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-id: 1468575911-20656-1-git-send-email-pl@kamp.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-15 12:11:55 +02:00
Herongguang (Stephen)
3f7e51bca3 vnc-enc-tight: fix off-by-one bug
In tight_encode_indexed_rect32, buf(or src)’s size is count. In for loop,
the logic is supposed to be that i is an index into src, i should be
incremented when incrementing src.

This is broken when src is incremented but i is not before while loop,
resulting in off-by-one bug in while loop.

Signed-off-by: He Rongguang <herongguang.he@huawei.com>
Message-id: 5784B8EB.7010008@huawei.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-15 12:11:55 +02:00
Gerd Hoffmann
5a8be0f73d vnc: make sure we finish disconnect
It may happen that vnc connections linger in disconnecting state forever
because VncState happens to be in a state where vnc_update_client()
exists early and never reaches the vnc_disconnect_finish() call at the
bottom of the function.  Fix that by doing an additinal check at the
start of the function.

https://bugzilla.redhat.com/show_bug.cgi?id=1352799

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1468405280-2571-1-git-send-email-kraxel@redhat.com
2016-07-15 12:00:06 +02:00
Peter Maydell
14c7d99333 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160714' into staging
target-arm queue:
 * add virtio-mmio transport base address to device path
   (avoid an assertion failure with multiple virtio-scsi-devices)
 * revert hw/ptimer commit 5a50307 which causes regressions on
   SPARC guests
 * use Neon to accelerate zero-page checking on AArch64 hosts
 * set the MPIDR for TCG to match how KVM does it (and fit with
   GICv2/GICv3 restrictions on SGI target lists)
 * add some missing AArch32 TLBI hypervisor TLB operations
 * m25p80: Fix QIOR/DIOR handling for Winbond
 * hw/misc: fix typo in Aspeed SCU hw-strap2 property name
 * ast2400: pretend DMAs are done for U-boot
 * ast2400: some minor code cleanups

# gpg: Signature made Thu 14 Jul 2016 17:21:30 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160714:
  ast2400: externalize revision numbers
  ast2400: pretend DMAs are done for U-boot
  ast2400: replace aspeed_smc_is_implemented()
  hw/misc: fix typo in Aspeed SCU hw-strap2 property name
  m25p80: Fix QIOR/DIOR handling for Winbond
  target-arm: Add missed AArch32 TLBI sytem registers
  hw/arm/virt: tcg: adjust MPIDR like KVM
  gic: provide defines for v2/v3 targetlist sizes
  target-arm: Use Neon for zero checking
  Revert "hw/ptimer: Perform counter wrap around if timer already expired"
  virtio-mmio: format transport base address in BusClass.get_dev_path

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 17:32:53 +01:00
Cédric Le Goater
79a9f323a8 ast2400: externalize revision numbers
AST2400_A0_SILICON_REV is defined twice. Fix this by including the
definition in the header file as well as the routine to check if a
silicon revision is supported. It will useful to reuse in other
controllers.

Let's add also AST2500_A0_SILICON_REV for future use.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467994016-11678-5-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:39 +01:00
Cédric Le Goater
2e1f05020b ast2400: pretend DMAs are done for U-boot
U-boot does SPI timing calibration using DMA tranfers. To let the
initialization continue, we fake success by setting the DMA status of
the Interrupt Control Register.

For the moment, DMA support is not required as it is not used in
normal operation.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467994016-11678-4-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:38 +01:00
Cédric Le Goater
97c2ed5dbd ast2400: replace aspeed_smc_is_implemented()
aspeed_smc_is_implemented() filters invalid registers in a peculiar
way. Let's remove it and open code the if conditions. It serves the
same purpose, the aesthetic is better, and new registers can easily be
added.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467994016-11678-3-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:38 +01:00
Cédric Le Goater
2ddfa2817b hw/misc: fix typo in Aspeed SCU hw-strap2 property name
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467994016-11678-2-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:38 +01:00
Marcin Krzeminski
fe84770528 m25p80: Fix QIOR/DIOR handling for Winbond
Winbond also support continuous read mode, but as an opposite for other
flash type read mode clock cycles are included to dummy cycles number.
This path add proper handling of read mode byte and update needed
dummy cycles. QPI mode and dummy cycles configuration are not supported.

Signed-off-by: Marcin Krzeminski <marcin.krzeminski@nokia.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467809036-6986-1-git-send-email-marcin.krzeminski@nokia.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:38 +01:00
Sergey Sorokin
541ef8c2e7 target-arm: Add missed AArch32 TLBI sytem registers
Some PL2 related TLBI system registers are missed in AArch32
implementation. The patch fixes it.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Message-id: 1468328885-3217862-1-git-send-email-afarallax@yandex.ru
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:37 +01:00
Andrew Jones
95eb49c8a3 hw/arm/virt: tcg: adjust MPIDR like KVM
KVM adjusts the MPIDR of guest vcpus based on the architecture of
the host, 32-bit vs. 64-bit, and, for 64-bit, also on the type of
GIC the guest is using. To be consistent and improve SGI efficiency
we make the same adjustments for TCG as 64-bit KVM hosts. We neglect
to add consistency with 32-bit KVM hosts, as that would reduce SGI
efficiency and KVM is expected to change.

As MPIDR is a system register, and thus guest visible, we only make
adjustments for current and later versioned machines.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-id: 1467378129-23302-3-git-send-email-drjones@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:37 +01:00
Andrew Jones
c8efd802c4 gic: provide defines for v2/v3 targetlist sizes
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-id: 1467378129-23302-2-git-send-email-drjones@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:37 +01:00
Vijay
7069532e3b target-arm: Use Neon for zero checking
Use Neon instructions to perform zero checking of
buffer. This is helps in reducing total migration time.

Use case: Idle VM live migration with 4 VCPUS and 8GB ram
running CentOS 7.

Without Neon, the Total migration time is 3.5 Sec

Migration status: completed
total time: 3560 milliseconds
downtime: 33 milliseconds
setup: 5 milliseconds
transferred ram: 297907 kbytes
throughput: 685.76 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2062760 pages
skipped: 0 pages
normal: 69808 pages
normal bytes: 279232 kbytes
dirty sync count: 3

With Neon, the total migration time is 2.9 Sec

Migration status: completed
total time: 2960 milliseconds
downtime: 65 milliseconds
setup: 4 milliseconds
transferred ram: 299869 kbytes
throughput: 830.19 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2064313 pages
skipped: 0 pages
normal: 70294 pages
normal bytes: 281176 kbytes
dirty sync count: 3

Signed-off-by: Vijaya Kumar K <vijayak@cavium.com>
Signed-off-by: Suresh <ksuresh@cavium.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1467190029-694-2-git-send-email-vijayak@cavium.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:36 +01:00
Dmitry Osipenko
56215da394 Revert "hw/ptimer: Perform counter wrap around if timer already expired"
Software should see timer counter wraparound only after IRQ being triggered.
This fixes regression introduced by the commit 5a50307 ("hw/ptimer: Perform
counter wrap around if timer already expired"), resulting in monotonic timer
jumping backwards on SPARC emulated machine running NetBSD guest OS, as
reported by Mark Cave-Ayland.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Message-id: 20160708132206.2080-1-digetx@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:36 +01:00
Laszlo Ersek
f58b39d2d5 virtio-mmio: format transport base address in BusClass.get_dev_path
At the moment the following QEMU command line triggers an assertion
failure (minimal reproducer by Cole):

  qemu-system-aarch64 \
    -machine virt-2.6,accel=tcg \
    -nodefaults \
    -no-user-config \
    -nographic -monitor stdio \
    -device virtio-scsi-device,id=scsi0 \
    -device virtio-scsi-device,id=scsi1 \
    -drive file=foo.img,format=raw,if=none,id=d0 \
    -device scsi-hd,bus=scsi0.0,drive=d0 \
    -drive file=foo.img,format=raw,if=none,id=d1 \
    -device scsi-hd,bus=scsi1.0,drive=d1

  qemu-system-aarch64: migration/savevm.c:615:
  vmstate_register_with_alias_id:
  Assertion `!se->compat || se->instance_id == 0' failed.

The reason is that the vmstate sections for the two scsi-hd devices are
not uniquely identifiable by name.

The direct parent buses of the scsi-hd devices -- scsi0.0 and scsi1.0 --
support the BusClass.get_dev_path member function. scsibus_get_dev_path()
formats a device path prefix with the help of its topologically parent
bus, and then appends the chan🆔lun triplet to it. For both scsi-hd
devices, this triplet is 0:0:0.

(Here we use "device path" in the QEMU migration sense, for vmstate
section identification, not in the OFW or UEFI device path senses.)

The virtio-scsi HBA is plugged into the virtio-mmio bus (implemented by
the internal VirtIOMMIOProxy device). This bus class
(TYPE_VIRTIO_MMIO_BUS) inherits, as its get_dev_path() member function,
the virtio_bus_get_dev_path() method from its parent class
(TYPE_VIRTIO_BUS).

virtio_bus_get_dev_path() does not format any kind of device address on
its own; "virtio addresses" are transport-specific. Therefore
virtio_bus_get_dev_path() asks the topologically parent bus of the proxy
object (implementing the specific virtio transport) to format the address
of the proxy object.

(For virtio-pci devices (where the proxy is an instance of VirtIOPCIProxy,
plugged into a PCI bus), this ends up in pcibus_get_dev_path().)

However, VirtIOMMIOProxy is usually (in practice: always) plugged into
"main-system-bus", the singleton TYPE_SYSTEM_BUS object. This BusClass
does not support formatting QEMU vmstate device paths at all (as
SysBusDevice objects can have zero or more IO ports and zero or more MMIO
regions). Hence the formatting request delegated from
virtio_bus_get_dev_path() gets answered with NULL.

The end result is that the two scsi-hd devices end up with the same device
path "0:0:0", which triggers the assert.

We can solve this by recognizing that virtio-mmio transports are
distinguished from each other by their base addresses in MMIO address
space. Implement virtio_mmio_bus_get_dev_path() as follows:

(1) The virtio device whose devpath is to be formatted resides on a
    virtio-mmio bus that is implemented by a VirtIOMMIOProxy object. Ask
    the parent bus of VirtIOMMIOProxy to format the device path of
    VirtIOMMIOProxy, as a path prefix. (This is identical to what
    virtio_bus_get_dev_path() does.)

(2) Append the base address of VirtIOMMIOProxy to the device path, such
    as:
    - virtio-mmio@000000000a003e00,
    - virtio-mmio@000000000a003c00.

Given that these device paths are placed in the migration stream, step (2)
above, if done unconditionally, would break migration. So make that step
conditional on a new VirtIOMMIOProxy property, which is enabled for 2.7
machine types and later.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Cole Robinson <crobinso@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Kevin Zhao <kevin.zhao@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Tom Hanson <thomas.hanson@linaro.org>
Reported-by: Kevin Zhao <kevin.zhao@linaro.org>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Message-id: 1467739394-28357-1-git-send-email-lersek@redhat.com
Fixes: https://bugs.launchpad.net/qemu/+bug/1594239
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:51:36 +01:00
Peter Maydell
1c8e93fb41 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream-fwcfg' into staging
* Updated fw_cfg option ROM to include DMA support

# gpg: Signature made Thu 14 Jul 2016 14:51:06 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream-fwcfg:
  Add optionrom compatible with fw_cfg DMA version

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 16:49:18 +01:00
Peter Maydell
22e28174ae Merge remote-tracking branch 'remotes/xtensa/tags/20160714-xtensa' into staging
Xtensa-related fixes:

- fix FLASH interface width for XTFPGA boards.

# gpg: Signature made Thu 14 Jul 2016 12:00:05 BST
# gpg:                using RSA key 0x51F9CC91F83FA044
# gpg: Good signature from "Max Filippov <max.filippov@cogentembedded.com>"
# gpg:                 aka "Max Filippov <jcmvbkbc@gmail.com>"
# Primary key fingerprint: 2B67 854B 98E5 327D CDEB  17D8 51F9 CC91 F83F A044

* remotes/xtensa/tags/20160714-xtensa:
  target-xtensa: xtfpga: fix FLASH interface width

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 15:57:28 +01:00
Marc Marí
b2a575a1c6 Add optionrom compatible with fw_cfg DMA version
This optionrom is based on linuxboot.S.

Signed-off-by: Marc Marí <markmb@redhat.com>
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Message-Id: <1464027093-24073-2-git-send-email-rjones@redhat.com>
[Add -fno-toplevel-reorder, support clang without -m16. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-14 15:50:52 +02:00
Peter Maydell
190c93c982 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* SCSI scanner support
* fixes to qemu-char and net exit
* FreeBSD fixes
* Other small bugfixes

# gpg: Signature made Wed 13 Jul 2016 12:30:11 BST
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  hostmem: detect host backend memory is being used properly
  hostmem: fix QEMU crash by 'info memdev'
  char: do not use atexit cleanup handler
  net: do not use atexit for cleanup
  slirp: use exit notifier for slirp_smb_cleanup
  tap: use an exit notifier to call down_script
  util: Fix MIN_NON_ZERO
  qemu-sockets: use qapi_free_SocketAddress in cleanup
  disas: avoid including everything in headers compiled from C++
  json-streamer: fix double-free on exiting during a parse
  main-loop: check return value before using pointer
  Use "-s" instead of "--quiet" to resolve non-fatal build error on FreeBSD.
  scsi-bus: Use longer sense buffer with scanners
  scsi-bus: Add SCSI scanner support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 13:44:06 +01:00
Max Filippov
f9a555e499 target-xtensa: xtfpga: fix FLASH interface width
FLASH chip on XTFPGA boards is connected with 16-bit-wide interface.
Latest U-Boot can see the difference and does not work correctly with
32-bit-wide interface.
Set FLASH chip 'width' property to 2.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2016-07-14 13:59:44 +03:00
Peter Maydell
9358450e98 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Wed 13 Jul 2016 12:46:17 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (34 commits)
  iotests: Make 157 actually format-agnostic
  vvfat: Fix qcow write target driver specification
  hmp: show all of snapshot info on every block dev in output of 'info snapshots'
  hmp: use snapshot name to determine whether a snapshot is 'fully available'
  qemu-iotests: Test naming of throttling groups
  blockdev: Fix regression with the default naming of throttling groups
  vmdk: fix metadata write regression
  Improve block job rate limiting for small bandwidth values
  qcow2: Fix qcow2_get_cluster_offset()
  qemu-io: Use correct range limitations
  qcow2: Avoid making the L1 table too big
  qemu-img: Use strerror() for generic resize error
  block: Remove BB options from blockdev-add
  qemu-iotests: Test setting WCE with qdev
  block/qdev: Allow configuring rerror/werror with qdev properties
  commit: Fix use of error handling policy
  block/qdev: Allow configuring WCE with qdev properties
  block/qdev: Allow node name for drive properties
  coroutine: move entry argument to qemu_coroutine_create
  test-coroutine: prepare for the next patch
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 11:48:46 +01:00
Peter Maydell
5bb2399f9b Merge remote-tracking branch 'remotes/rth/tags/pull-rth-20160712' into staging
target-sparc improvements, v4

# gpg: Signature made Tue 12 Jul 2016 19:04:33 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-rth-20160712: (24 commits)
  target-sparc: Elide duplicate updates to fprs
  target-sparc: Use cpu_loop_exit_restore from helper_check_ieee_exceptions
  target-sparc: Use cpu_fsr in stfsr
  target-sparc: Use explicit writes to cpu_fsr
  target-sparc: Remove helper_ldf_asi, helper_stf_asi
  target-sparc: Directly implement block and short ldf/stf asis
  target-sparc: Directly implement easy ldf/stf asis
  target-sparc: Pass TCGMemOp constants to helper_ld/st_asi
  target-sparc: Fix obvious error in ASI_M_BFILL
  target-sparc: Directly implement easy ldd/std asis
  target-sparc: Introduce gen_check_align
  target-sparc: Use QT0 to return results from ldda
  target-sparc: Directly implement easy ld/st asis
  target-sparc: Use defines from asi.h
  target-sparc: Add UA2005 defines to asi.h
  target-sparc: Import linux/arch/sparc/include/uapi/asm/asi.h
  target-sparc: Pass TCGMemOp to gen_ld/st_asi
  target-sparc: Introduce get_asi
  target-sparc: Store %asi in TB flags
  target-sparc: Unify asi handling between 32 and 64-bit
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-14 10:36:27 +01:00
Kevin Wolf
543d7a42ba Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2016-07-13' into queue-block
Block patches (v2) for the block queue.

# gpg: Signature made Wed Jul 13 13:41:53 2016 CEST
# gpg:                using RSA key 0x3BB14202E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40
#      Subkey fingerprint: 58B3 81CE 2DC8 9CF9 9730  EE64 3BB1 4202 E838 ACAD

* mreitz/tags/pull-block-for-kevin-2016-07-13:
  iotests: Make 157 actually format-agnostic
  vvfat: Fix qcow write target driver specification
  hmp: show all of snapshot info on every block dev in output of 'info snapshots'
  hmp: use snapshot name to determine whether a snapshot is 'fully available'
  qemu-iotests: Test naming of throttling groups
  blockdev: Fix regression with the default naming of throttling groups
  vmdk: fix metadata write regression
  Improve block job rate limiting for small bandwidth values
  qcow2: Fix qcow2_get_cluster_offset()
  qemu-io: Use correct range limitations
  qcow2: Avoid making the L1 table too big
  qemu-img: Use strerror() for generic resize error

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:45:55 +02:00
Max Reitz
42190dcc70 iotests: Make 157 actually format-agnostic
iotest 157 pretends not to care about the image format used, but in fact
it does due to the format name not being filtered in its output. This
patch adds filtering and changes the reference output accordingly.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20160711132246.3152-1-mreitz@redhat.com
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
Max Reitz
c4b48bfdc5 vvfat: Fix qcow write target driver specification
First, bdrv_open_child() expects all options for the child to be
prefixed by the child's name (and a separating dot). Second,
bdrv_open_child() does not take ownership of the QDict passed to it but
only extracts all options for the child, so if a QDict is created for
the sole purpose of passing it to bdrv_open_child(), it needs to be
freed afterwards.

This patch makes vvfat adhere to both of these rules.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20160711135452.11304-1-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
0c204cc810 hmp: show all of snapshot info on every block dev in output of 'info snapshots'
Currently, the output of 'info snapshots' shows fully available snapshots.
It's opaque, hides some snapshot information to users. It's not convenient
if users want to know more about all of snapshot information on every block
device via monitor.

Follow Kevin's and Max's proposals, The patch makes the output more detailed:
(qemu) info snapshots
List of snapshots present on all disks:
 ID        TAG                 VM SIZE                DATE       VM CLOCK
 --        checkpoint-1           165M 2016-05-22 16:58:07   00:02:06.813

List of partial (non-loadable) snapshots on 'drive_image1':
 ID        TAG                 VM SIZE                DATE       VM CLOCK
 1         snap1                     0 2016-05-22 16:57:31   00:01:30.567

Signed-off-by: Lin Ma <lma@suse.com>
Message-id: 1467869164-26688-3-git-send-email-lma@suse.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
3a1ee71190 hmp: use snapshot name to determine whether a snapshot is 'fully available'
Currently qemu uses snapshot id to determine whether a snapshot is fully
available, It causes incorrect output in some scenario.

For instance:
(qemu) info block
drive_image1 (#block113): /opt/vms/SLES12-SP1-JeOS-x86_64-GM/disk0.qcow2
(qcow2)
    Cache mode:       writeback

drive_image2 (#block349): /opt/vms/SLES12-SP1-JeOS-x86_64-GM/disk1.qcow2
(qcow2)
    Cache mode:       writeback
(qemu)
(qemu) info snapshots
There is no snapshot available.
(qemu)
(qemu) snapshot_blkdev_internal drive_image1 snap1
(qemu)
(qemu) info snapshots
There is no suitable snapshot available
(qemu)
(qemu) savevm checkpoint-1
(qemu)
(qemu) info snapshots
ID        TAG                 VM SIZE                DATE       VM CLOCK
1         snap1                     0 2016-05-22 16:57:31   00:01:30.567
(qemu)

$ qemu-img snapshot -l disk0.qcow2
Snapshot list:
ID        TAG                 VM SIZE                DATE       VM CLOCK
1         snap1                     0 2016-05-22 16:57:31   00:01:30.567
2         checkpoint-1           165M 2016-05-22 16:58:07   00:02:06.813

$ qemu-img snapshot -l disk1.qcow2
Snapshot list:
ID        TAG                 VM SIZE                DATE       VM CLOCK
1         checkpoint-1              0 2016-05-22 16:58:07   00:02:06.813

The patch uses snapshot name instead of snapshot id to determine whether a
snapshot is fully available and uses '--' instead of snapshot id in output
because the snapshot id is not guaranteed to be the same on all images.
For instance:
(qemu) info snapshots
List of snapshots present on all disks:
 ID        TAG                 VM SIZE                DATE       VM CLOCK
 --        checkpoint-1           165M 2016-05-22 16:58:07   00:02:06.813

Signed-off-by: Lin Ma <lma@suse.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 1467869164-26688-2-git-send-email-lma@suse.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
Alberto Garcia
435d5ee6cd qemu-iotests: Test naming of throttling groups
Throttling groups are named using the 'group' parameter of the
block_set_io_throttle command and the throttling.group command-line
option. If that parameter is unspecified the groups get the name of
the block device.

This patch adds a new test to check the naming of throttling groups.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: d87d02823a6b91609509d8bb18e2f5dbd9a6102c.1467986342.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
Alberto Garcia
ff356ee4da blockdev: Fix regression with the default naming of throttling groups
When I/O limits are set for a block device, the name of the throttling
group is taken from the BlockBackend if the user doesn't specify one.

Commit efaa7c4eeb moved the naming of the BlockBackend in
blockdev_init() to the end of the function, after I/O limits are set.
The consequence is that the throttling group gets an empty name.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reported-by: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: qemu-stable@nongnu.org
Message-id: af5cd58bd2c4b9f6c57f260d9cfe586b9fb7d34d.1467986342.git.berto@igalia.com
[mreitz: Use existing "id" variable instead of new "blk_id"]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
Reda Sallahi
524089bce4 vmdk: fix metadata write regression
Commit "cdeaf1f vmdk: add bdrv_co_write_zeroes" causes a regression on
writes. It writes metadata after every write instead of doing it only once
for each cluster.

vmdk_pwritev() writes metadata whenever m_data is set as valid so this patch
sets m_data as valid only when we have a new cluster which hasn't been
allocated before or a zero grain.

Signed-off-by: Reda Sallahi <fullmanet@gmail.com>
Message-id: 20160707084249.29084-1-fullmanet@gmail.com
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:39 +02:00
Sascha Silbe
f14a39ccb9 Improve block job rate limiting for small bandwidth values
ratelimit_calculate_delay() previously reset the accounting every time
slice, no matter how much data had been processed before. This had (at
least) two consequences:

1. The minimum speed is rather large, e.g. 5 MiB/s for commit and stream.

   Not sure if there are real-world use cases where this would be a
   problem. Mirroring and backup over a slow link (e.g. DSL) would
   come to mind, though.

2. Tests for block job operations (e.g. cancel) were rather racy

   All block jobs currently use a time slice of 100ms. That's a
   reasonable value to get smooth output during regular
   operation. However this also meant that the state of block jobs
   changed every 100ms, no matter how low the configured limit was. On
   busy hosts, qemu often transferred additional chunks until the test
   case had a chance to cancel the job.

Fix the block job rate limit code to delay for more than one time
slice to address the above issues. To make it easier to handle
oversized chunks we switch the semantics from returning a delay
_before_ the current request to a delay _after_ the current
request. If necessary, this delay consists of multiple time slice
units.

Since the mirror job sends multiple chunks in one go even if the rate
limit was exceeded in between, we need to keep track of the start of
the current time slice so we can correctly re-compute the delay for
the updated amount of data.

The minimum bandwidth now is 1 data unit per time slice. The block
jobs are currently passing the amount of data transferred in sectors
and using 100ms time slices, so this translates to 5120
bytes/second. With chunk sizes usually being O(512KiB), tests have
plenty of time (O(100s)) to operate on block jobs. The chance of a
race condition now is fairly remote, except possibly on insanely
loaded systems.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Message-id: 1467127721-9564-2-git-send-email-silbe@linux.vnet.ibm.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:38 +02:00
Max Reitz
c834cba905 qcow2: Fix qcow2_get_cluster_offset()
Recently, qcow2_get_cluster_offset() has been changed to work with bytes
instead of sectors. This invalidated some assertions and introduced a
possible integer multiplication overflow.

This could be reproduced using e.g.

$ qemu-img create -f qcow2 -o cluster_size=1M blub.qcow2 8G
Formatting 'foo.qcow2', fmt=qcow2 size=8589934592 encryption=off
cluster_size=1048576 lazy_refcounts=off refcount_bits=16
$ qemu-io -c map blub.qcow2
qemu-io: qemu/block/qcow2-cluster.c:504: qcow2_get_cluster_offset:
Assertion `bytes_needed <= INT_MAX' failed.
[1]    20775 abort (core dumped)  qemu-io -c map foo.qcow2

This patch removes the now wrong assertion, adding comments and more
assertions to prove its correctness (and fixing the overflow which would
become apparent with the original assertion removed).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20160620142623.24471-3-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:38 +02:00
Max Reitz
a367467995 qemu-io: Use correct range limitations
create_iovec() has a comment lamenting the lack of SIZE_T_MAX. Since
there actually is a SIZE_MAX, use it.

Two places use INT_MAX for checking the upper bound of a sector count
that is used as an argument for a blk_*() function (blk_discard() and
blk_write_compressed(), respectively). BDRV_REQUEST_MAX_SECTORS should
be used instead.

And finally, do_co_pwrite_zeroes() used to similarly check that the
sector count does not exceed INT_MAX. However, this function is now
backed by blk_co_pwrite_zeroes() which takes bytes as an argument
instead of sectors. Therefore, it should be the byte count that does not
exceed INT_MAX, not the sector count.

Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:38 +02:00
Max Reitz
84c26520d3 qcow2: Avoid making the L1 table too big
We refuse to open images whose L1 table we deem "too big". Consequently,
we should not produce such images ourselves.

Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20160615153630.2116-3-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
[mreitz: Added QEMU_BUILD_BUG_ON()]
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:38 +02:00
Max Reitz
bcf23482ae qemu-img: Use strerror() for generic resize error
Emitting the plain error number is not very helpful. Use strerror()
instead.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20160615153630.2116-2-mreitz@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:41:38 +02:00
Kevin Wolf
35fedb7b0e block: Remove BB options from blockdev-add
werror/rerror are now available as qdev options. The stats-* options are
removed without an existing replacement; they should probably be
configurable with a separate QMP command like I/O throttling settings.

Removing id is left for another day because this involves updating
qemu-iotests cases to use node-name for everything. Before we can do
that, however, all QMP commands must support node-name.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:32:28 +02:00
Kevin Wolf
62ed9fa991 qemu-iotests: Test setting WCE with qdev
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:32:27 +02:00
Kevin Wolf
8c39825218 block/qdev: Allow configuring rerror/werror with qdev properties
The rerror/werror policies are implemented in the devices, so that's
where they should be configured. In comparison to the old options in
-drive, the qdev properties are only added to those devices that
actually support them.

If the option isn't given (or "auto" is specified), the setting of the
BlockBackend is used for compatibility with the old options. For block
jobs, "auto" is the same as "enospc".

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:32:27 +02:00
Kevin Wolf
1e8fb7f1ee commit: Fix use of error handling policy
Commit implemented the 'enospc' policy as 'ignore' if the error was not
ENOSPC. The QAPI documentation promises that it's treated as 'stop'.
Using the common block job error handling function fixes this and also
adds the missing QMP event.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:32:27 +02:00
Kevin Wolf
f6166a06ff block/qdev: Allow configuring WCE with qdev properties
As cache.writeback is a BlockBackend property and as such more related
to the guest device than the BlockDriverState, we already removed it
from the blockdev-add interface. This patch adds the new way to set it,
as a qdev property of the corresponding guest device.

For example: -drive if=none,file=test.img,node-name=img
             -device ide-hd,drive=img,write-cache=off

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:32:27 +02:00
Xiao Guangrong
2aece63c8a hostmem: detect host backend memory is being used properly
Currently, we use memory_region_is_mapped() to detect if the host
backend memory is being used. This works if the memory is directly
mapped into guest's address space, however, it is not true for
nvdimm as it uses aliased memory region to map the memory. This is
why this bug can happen:
   https://bugzilla.redhat.com/show_bug.cgi?id=1352769

Fix it by introduce a new filed, is_mapped, to HostMemoryBackend,
we set/clear this filed accordingly when the device link/unlink to
host backend memory

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:30:04 +02:00
Xiao Guangrong
1454d33f05 hostmem: fix QEMU crash by 'info memdev'
'info memdev' crashes QEMU:
   (qemu) info memdev
   Unexpected error in parse_str() at qapi/string-input-visitor.c:111:
   Parameter 'null' expects an int64 value or range
It is caused by null uint16List is returned if 'host-nodes' is the default
value

Return MAX_NODES under this case to fix this bug

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:30:04 +02:00
Marc-André Lureau
aa5cb7f5e8 char: do not use atexit cleanup handler
It turns out qemu is calling exit() in various places from various
threads without taking much care of resources state. The atexit()
cleanup handlers cannot easily destroy resources that are in use (by
the same thread or other).

Since c1111a24a3, TCG arm guests run into the following abort() when
running tests, the chardev mutex is locked during the write, so
qemu_mutex_destroy() returns an error:

 #0  0x00007fffdbb806f5 in raise () at /lib64/libc.so.6
 #1  0x00007fffdbb822fa in abort () at /lib64/libc.so.6
 #2  0x00005555557616fe in error_exit (err=<optimized out>, msg=msg@entry=0x555555c38c30 <__func__.14622> "qemu_mutex_destroy")
     at /home/drjones/code/qemu/util/qemu-thread-posix.c:39
 #3  0x0000555555b0be20 in qemu_mutex_destroy (mutex=mutex@entry=0x5555566aa0e0) at /home/drjones/code/qemu/util/qemu-thread-posix.c:57
 #4  0x00005555558aab00 in qemu_chr_free_common (chr=0x5555566aa0e0) at /home/drjones/code/qemu/qemu-char.c:4029
 #5  0x00005555558b05f9 in qemu_chr_delete (chr=<optimized out>) at /home/drjones/code/qemu/qemu-char.c:4038
 #6  0x00005555558b05f9 in qemu_chr_delete (chr=<optimized out>) at /home/drjones/code/qemu/qemu-char.c:4044
 #7  0x00005555558b062c in qemu_chr_cleanup () at /home/drjones/code/qemu/qemu-char.c:4557
 #8  0x00007fffdbb851e8 in __run_exit_handlers () at /lib64/libc.so.6
 #9  0x00007fffdbb85235 in  () at /lib64/libc.so.6
 #10 0x00005555558d1b39 in testdev_write (testdev=0x5555566aa0a0) at /home/drjones/code/qemu/backends/testdev.c:71
 #11 0x00005555558d1b39 in testdev_write (chr=<optimized out>, buf=0x7fffc343fd9a "", len=0) at /home/drjones/code/qemu/backends/testdev.c:95
 #12 0x00005555558adced in qemu_chr_fe_write (s=0x5555566aa0e0, buf=buf@entry=0x7fffc343fd98 "0q", len=len@entry=2) at /home/drjones/code/qemu/qemu-char.c:282

Instead of using a atexit() handler, only run the chardev cleanup as
initially proposed at the end of main(), where there are less chances
(hic) of conflicts or other races.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reported-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20160704153823.16879-1-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:30:00 +02:00
Paolo Bonzini
8caf911d33 net: do not use atexit for cleanup
This will be necessary in the next patch, which stops using atexit for
character devices; without it, vhost-user and the redirector filter
will cause a use-after-free.  Relying on the ordering of atexit calls
is also brittle, even now that both the network and chardev
subsystems are using atexit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:30:00 +02:00
Paolo Bonzini
f6c2e66ae8 slirp: use exit notifier for slirp_smb_cleanup
We would like to move back net_cleanup() at the end of main function,
like it used to be until f30dbae63a, but minimum
cleanup is needed regardless at exit() time for slirp's SMB
functionality.  Use an exit notifier to call slirp_smb_cleanup.
If net_cleanup() is called first, then remove the exit notifier as it
will become a dangling pointer otherwise.

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:30:00 +02:00
Marc-André Lureau
9e32ff3299 tap: use an exit notifier to call down_script
We would like to move back net_cleanup() at the end of main function,
like it used to be until f30dbae63a, but minimum
tap cleanup is necessary regarless at exit() time. Use an exit notifier
to call TAP down_script. If net_cleanup() is called first, then remove
the exit notifier as it will become a dangling pointer otherwise.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20160711144847.16651-1-marcandre.lureau@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 13:29:56 +02:00
Kevin Wolf
8daea51095 block/qdev: Allow node name for drive properties
If a node name instead of a BlockBackend name is specified as the driver
for a guest device, an anonymous BlockBackend is created now.

The order of operations in release_drive() must be reversed in order to
avoid a use-after-free bug because now blk_detach_dev() frees the last
reference if an anonymous BlockBackend is used.

usb-storage uses a hack where it forwards its BlockBackend as a property
to another device that it internally creates. This hack must be updated
so that it doesn't drop its original BB before it can be passed to the
other device. This used to work because we always had the monitor
reference around, but with node-names the device reference is the only
one now.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2016-07-13 13:28:00 +02:00
Paolo Bonzini
0b8b8753e4 coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c).  So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.

Mostly done with the following semantic patch:

@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
  ...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);

@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
  ...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);

@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));

@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);

except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Paolo Bonzini
7e70cdba9f test-coroutine: prepare for the next patch
The next patch moves the coroutine argument from first-enter to
creation time.  In this case, coroutine has not been initialized
yet when the coroutine is created, so change to a pointer.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Paolo Bonzini
7d9c858137 coroutine: use QSIMPLEQ instead of QTAILQ
CoQueue do not need to remove any element but the head of the list;
processing is always strictly FIFO.  Therefore, the simpler singly-linked
QSIMPLEQ can be used instead.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Fam Zheng
5af7045bd0 raw-posix: Use qemu_dup
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Fam Zheng
761d1ddf25 osdep: Introduce qemu_dup
And use it in qemu_dup_flags.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
6aae5be6a7 blockjob: Update description of the 'device' field in the QMP API
The 'device' field in all BLOCK_JOB_* events and 'block-job-*' command
is no longer the device name, but the ID of the job. This patch
updates the documentation to clarify that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
a5d5a3bdbd qemu-img: Set the ID of the block job in img_commit()
img_commit() creates a block job without an ID. This is no longer
allowed now that we require it to be unique and well-formed. We were
solving this by having a fallback in block_job_create(), but now that
we extended the API of commit_active_start() we can finally set an
explicit ID and revert that change.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
fd62c609ed commit: Add 'job-id' parameter to 'block-commit'
This patch adds a new optional 'job-id' parameter to 'block-commit',
allowing the user to specify the ID of the block job to be created.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
2323322ed0 stream: Add 'job-id' parameter to 'block-stream'
This patch adds a new optional 'job-id' parameter to 'block-stream',
allowing the user to specify the ID of the block job to be created.

The HMP 'block_stream' command remains unchanged.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
70559d499c backup: Add 'job-id' parameter to 'blockdev-backup' and 'drive-backup'
This patch adds a new optional 'job-id' parameter to 'blockdev-backup'
and 'drive-backup', allowing the user to specify the ID of the block
job to be created.

The HMP 'drive_backup' command remains unchanged.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
71aa98678c mirror: Add 'job-id' parameter to 'blockdev-mirror' and 'drive-mirror'
This patch adds a new optional 'job-id' parameter to 'blockdev-mirror'
and 'drive-mirror', allowing the user to specify the ID of the block
job to be created.

The HMP 'drive_mirror' command remains unchanged.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
7f0317cfc8 blockjob: Add 'job_id' parameter to block_job_create()
When a new job is created, the job ID is taken from the device name of
the BDS. This patch adds a new 'job_id' parameter to let the caller
provide one instead.

This patch also verifies that the ID is always unique and well-formed.
This causes problems in a couple of places where no ID is being set,
because the BDS does not have a device name.

In the case of test_block_job_start() (from test-blockjob-txn.c) we
can simply use this new 'job_id' parameter to set the missing ID.

In the case of img_commit() (from qemu-img.c) we still don't have the
API to make commit_active_start() set the job ID, so we solve it by
setting a default value. We'll get rid of this as soon as we extend
the API.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
3ddf3efefa block: Use block_job_get() in find_block_job()
find_block_job() looks for a block backend with a specified name,
checks whether it has a block job and acquires its AioContext.

We want to identify jobs by their ID and not by the block backend
they're attached to, so this patch ignores the backends altogether and
gets the job directly. Apart from making the code simpler, this will
allow us to find block jobs once they start having user-specified IDs.

To ensure backward compatibility we keep ERROR_CLASS_DEVICE_NOT_ACTIVE
as the error class if the job doesn't exist. In subsequent patches
we'll also need to keep the device name as the default job ID if the
user doesn't specify a different one.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
ffb1f10cd1 blockjob: Add block_job_get()
Currently the way to look for a specific block job is to iterate the
list manually using block_job_next().

Since we want to be able to identify a job primarily by its ID it
makes sense to have a function that does just that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
9df229c3ca blockjob: Update description of the 'id' field
The 'id' field of the BlockJob structure will be able to hold any ID,
not only a device name. This patch updates the description of that
field and the error messages where it is being used.

Soon we'll add the ability to set an arbitrary ID when creating a
block job.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Alberto Garcia
29338003c9 stream: Fix prototype of stream_start()
'stream-start' has a parameter called 'backing-file', which is the
string to be written to bs->backing when the job finishes.

In the stream_start() implementation it is called 'backing_file_str',
but it the prototype in the header file it is called 'base_id'.

This patch fixes it so the name is the same in both cases and is
consistent with other cases (like commit_start()).

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-13 13:26:02 +02:00
Fam Zheng
d27ba624aa util: Fix MIN_NON_ZERO
MIN_NON_ZERO(1, 0) is evaluated to 0. Rewrite the macro to fix it.

Reported-by: Miroslav Rezanina <mrezanin@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <1468306113-847-1-git-send-email-famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-13 12:55:11 +02:00
Peter Maydell
9ec3025660 Merge remote-tracking branch 'remotes/mcayland/tags/qemu-openbios-signed' into staging
OpenBIOS: switch over to official OpenBIOS git repo

# gpg: Signature made Tue 12 Jul 2016 19:09:57 BST
# gpg:                using RSA key 0x5BC2C56FAE0F321F
# gpg: Good signature from "Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>"
# Primary key fingerprint: CC62 1AB9 8E82 200D 915C  C9C4 5BC2 C56F AE0F 321F

* remotes/mcayland/tags/qemu-openbios-signed:
  OpenBIOS: switch over to official OpenBIOS git repo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-13 11:15:35 +01:00
Mark Cave-Ayland
fa3007e7ec OpenBIOS: switch over to official OpenBIOS git repo
This update should preserve git history, and switches git.qemu-project.org
over to be a mirror of the new official git repo hosted at
https://github.com/openbios from a git-svn import of the old coreboot SVN
repository. All prior history from the SVN repository should still be preserved
(i.e. commit hashes are the same for historical commits).

No other source changes are made by this commit since both the old and new
HEADs contain the same source tree (albeit with difference metadata) whilst the
previous git-svn HEAD can be retrieved via the svn-head branch.

Proposed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2016-07-12 19:07:56 +01:00
Richard Henderson
f9c816c00c target-sparc: Elide duplicate updates to fprs
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:03:01 -07:00
Richard Henderson
02c79d7885 target-sparc: Use cpu_loop_exit_restore from helper_check_ieee_exceptions
This avoids needing to save state before every FP operation.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:58 -07:00
Richard Henderson
ba2397d1ca target-sparc: Use cpu_fsr in stfsr
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:56 -07:00
Richard Henderson
7385aed20d target-sparc: Use explicit writes to cpu_fsr
By arranging for explicit writes to cpu_fsr after floating point
operations, we are able to mark the helpers as not writing to
tcg globals, which means that we don't need to invalidate the
integer register set across said calls.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:52 -07:00
Richard Henderson
f2fe396f0f target-sparc: Remove helper_ldf_asi, helper_stf_asi
We've now implemented all fp asis inline, except for the no-fault
memory reads.  The latter can be passed directly to helper_ld_asi.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:48 -07:00
Richard Henderson
ca5ce5723f target-sparc: Directly implement block and short ldf/stf asis
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:45 -07:00
Richard Henderson
7705091ca4 target-sparc: Directly implement easy ldf/stf asis
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:43 -07:00
Richard Henderson
6850811e7c target-sparc: Pass TCGMemOp constants to helper_ld/st_asi
Reduces the argument count for helper_ld_asi; do helper_st_asi
for consistency.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:40 -07:00
Richard Henderson
c095b83f98 target-sparc: Fix obvious error in ASI_M_BFILL
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:37 -07:00
Richard Henderson
e4dc0052a4 target-sparc: Directly implement easy ldd/std asis
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:35 -07:00
Richard Henderson
35e94905ce target-sparc: Introduce gen_check_align
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:32 -07:00
Richard Henderson
3f4288ebf6 target-sparc: Use QT0 to return results from ldda
Also implement a few more twinx asis.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:29 -07:00
Richard Henderson
f0913be04b target-sparc: Directly implement easy ld/st asis
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:27 -07:00
Richard Henderson
0cc1f4bf76 target-sparc: Use defines from asi.h
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:24 -07:00
Richard Henderson
1d854963ea target-sparc: Add UA2005 defines to asi.h
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:22 -07:00
Richard Henderson
68a03b8c88 target-sparc: Import linux/arch/sparc/include/uapi/asm/asi.h
Copied from tag v4.2, 64291f7db5bd8150a74ad2036f1037e6a0428df2.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:19 -07:00
Richard Henderson
1d65b0f5bb target-sparc: Pass TCGMemOp to gen_ld/st_asi
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:16 -07:00
Richard Henderson
7ec1e5ea4b target-sparc: Introduce get_asi
Replace gen_get_asi, and use it for both 32-bit and 64-bit.
For v8, do supervisor and immediate checks here.

Also, move save_state and TB ending into the respective
subroutines, out of disas_sparc_insn.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:11 -07:00
Richard Henderson
a6d567e523 target-sparc: Store %asi in TB flags
Knowing the value of %asi at translation time means that we
can handle the common settings without a function call.

The steady state appears to be %asi == ASI_P, so that sparcv9
code can use offset forms of lda/sta.  The %asi register gets
pushed and popped on entry to certain functions, but it rarely
takes on values other than ASI_P or ASI_AIUP.  Therefore we're
unlikely to be expanding the set of TBs created.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:06 -07:00
Richard Henderson
22e700607a target-sparc: Unify asi handling between 32 and 64-bit
We now have a single copy of gen_ld_asi, gen_st_asi,
gen_swap_asi, and everything uses gen_get_asi.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:03 -07:00
Richard Henderson
4fbe006790 target-sparc: Create gen_exception
This unifies quite a few duplicate code fragments.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:02:00 -07:00
Richard Henderson
99a230638a target-sparc: Store mmu index in TB flags
Doing this instead of saving the raw PS_PRIV and TL.  This means
that all nucleus mode TBs (TL > 0) can be shared.  This fixes a
bug in that we didn't include HS_PRIV in the TB flags, and so could
produce incorrect TB matches for hypervisor state.

The LSU and DMMU states were unused by the translator.  Including
them in TB flags meant unnecessary mismatches from tb_find_fast.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:01:55 -07:00
Richard Henderson
e86ceb0d65 target-sparc: Remove softint as a TCG global
The global is only ever read for one insn; we can just as well
use a load from env instead and generate the same code.  This
also allows us to indicate the the associated helpers do not
touch TCG globals.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:01:51 -07:00
Richard Henderson
be72f9fcca target-sparc: Mark more flags for helpers
Quite a few helpers do not modify tcg globals but did not so indicate.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-12 11:01:39 -07:00
Marc-André Lureau
73f40c1895 qemu-sockets: use qapi_free_SocketAddress in cleanup
Commit 74b6ce43e3 uses the wrong free API for a SocketAddress, that
may leak some linked data.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20160706164246.22116-1-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:27 +02:00
Paolo Bonzini
1e13c01d2a disas: avoid including everything in headers compiled from C++
disas/arm-a64.cc is careful to include only the bare minimum that
it needs---qemu/osdep.h and disas/bfd.h.  Unfortunately, disas/bfd.h
then includes qemu-common.h, which brings in qemu/option.h and from
there we get the kitchen sink.

This causes problems because for example QEMU's atomic macros
conflict with C++ atomic types.  But really all that bfd.h needs
is the fprintf_function typedef, so replace the inclusion of
qemu-common.h with qemu/fprintf-fn.h.

Reported-by: Sean Bruno <sbruno@freebsd.org>
Tested-by: Sean Bruno <sbruno@freebsd.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:27 +02:00
Paolo Bonzini
a942d8fa01 json-streamer: fix double-free on exiting during a parse
Now that json-streamer tries not to leak tokens on incomplete parse,
the tokens can be freed twice if QEMU destroys the json-streamer
object during the parser->emit call.  To fix this, create the new
empty GQueue earlier, so that it is already in place when the old
one is passed to parser->emit.

Reported-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1467636059-12557-1-git-send-email-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:27 +02:00
Cao jin
28ba61e7ff main-loop: check return value before using pointer
pointer 'qemu_aio_context' should be checked first before it is used.
qemu_bh_new() will use it.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Message-Id: <1467799740-26079-2-git-send-email-caoj.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:27 +02:00
Sean Bruno
540aecd099 Use "-s" instead of "--quiet" to resolve non-fatal build error on FreeBSD.
The --quiet argument is not available on all operating systems.  Use -s
instead to match the rest of the Makefile uses.  This fixes a non-fatal
error seen on FreeBSD.

Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Sean Bruno <sbruno@freebsd.org>
Message-Id: <20160614180734.8782-1-sbruno@freebsd.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:26 +02:00
Jarkko Lavinen
6959e508c6 scsi-bus: Use longer sense buffer with scanners
Scanners can provide additional sense bytes beyond 18 bytes.
VueScan uses 32 bytes alloc length with Request Sense command.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@iki.fi>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:26 +02:00
Jarkko Lavinen
297b044a7f scsi-bus: Add SCSI scanner support
Add support for missing scanner specific SCSI commands and their xfer
lenghts as per ANSI spec section 15.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@iki.fi>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-12 18:31:26 +02:00
Peter Maydell
ca3d87d4c8 Merge remote-tracking branch 'remotes/armbru/tags/pull-include-2016-07-12' into staging
Clean up #include "..." vs <...> and header guards

# gpg: Signature made Tue 12 Jul 2016 15:23:43 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-include-2016-07-12:
  cris: Fix broken header guard in hw/cris/boot.h
  Clean up decorations and whitespace around header guards
  Clean up ill-advised or unusual header guards
  libdecnumber: Don't error out on decNumberLocal.h re-inclusion
  libdecnumber: Don't fool around with guards to avoid #include
  Clean up header guards that don't match their file name
  Drop Emacs local variables lists redundant with .dir-locals.el
  spapr_pci: Include spapr.h instead of playing games with #error
  tcg: Clean up tcg-target.h header guards
  linux-user: Fix broken header guard in syscall_defs.h
  linux-user: Clean up hostdep.h header guards
  linux-user: Clean up target_structs.h header guards
  linux-user: Clean up target_signal.h header guards
  linux-user: Clean up target_cpu.h header guards
  linux-user: Clean up target_syscall.h header guards
  target-*: Clean up cpu.h header guards
  scripts: New clean-header-guards.pl
  Use #include "..." for our own headers, <...> for others

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 16:04:36 +01:00
Markus Armbruster
82751a32be cris: Fix broken header guard in hw/cris/boot.h
Found with scripts/clean-header-guards.pl.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:20:46 +02:00
Markus Armbruster
175de52487 Clean up decorations and whitespace around header guards
Cleaned up with scripts/clean-header-guards.pl.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:20:46 +02:00
Markus Armbruster
2a6a4076e1 Clean up ill-advised or unusual header guards
Cleaned up with scripts/clean-header-guards.pl.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:20:46 +02:00
Markus Armbruster
965379b455 libdecnumber: Don't error out on decNumberLocal.h re-inclusion
decNumberLocal.h errors out when it's included with its header guard
defined.  This catches multiple inclusions.

Drop that.  Including it multiple times is safe, and the compiler can
do it efficiently.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
6031a51f1d libdecnumber: Don't fool around with guards to avoid #include
Some libdecnumber headers avoid including decNumber.h or decContext.h
again by checking their header guards.  Don't.  Including them
multiple times is safe, and the compiler can do it efficiently.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
121d07125b Clean up header guards that don't match their file name
Header guard symbols should match their file name to make guard
collisions less likely.  Offenders found with
scripts/clean-header-guards.pl -vn.

Cleaned up with scripts/clean-header-guards.pl, followed by some
renaming of new guard symbols picked by the script to better ones.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
85aad98a0e Drop Emacs local variables lists redundant with .dir-locals.el
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
20668fdebd spapr_pci: Include spapr.h instead of playing games with #error
include/hw/pci-host/spapr.h needs hw/ppc/spapr.h.  It checks whether
its header guard is defined, and errors out if it isn't.

Playing games with some other header's guard symbol is not a good
idea.  Just include the frackin' header already.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
14e54f8ecf tcg: Clean up tcg-target.h header guards
These use guard symbols like TCG_TARGET_$target.
scripts/clean-header-guards.pl doesn't like them because they don't
match their file name (they should, to make guard collisions less
likely).

Clean them up: use guard symbol $target_TCG_TARGET_H for
tcg/$target/tcg-target.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
1b3c4fdf30 linux-user: Fix broken header guard in syscall_defs.h
Found with scripts/clean-header-guards.pl.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
59e96bcbf9 linux-user: Clean up hostdep.h header guards
These headers all use QEMU_HOSTDEP_H as header guard symbol.  Reuse of
the same guard symbol in multiple headers is okay as long as they
cannot be included together.

Since we can avoid guard symbol reuse easily, do so: use guard symbol
$target_HOSTDEP_H for linux-user/host/$target/hostdep.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
3500385697 linux-user: Clean up target_structs.h header guards
These headers all use TARGET_STRUCTS_H as header guard symbol.  Reuse
of the same guard symbol in multiple headers is okay as long as they
cannot be included together.

Since we can avoid guard symbol reuse easily, do so: use guard symbol
$target_TARGET_STRUCTS_H for linux-user/$target/target_structs.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
9c93ae13a4 linux-user: Clean up target_signal.h header guards
These headers all use TARGET_SIGNAL_H as header guard symbol.  Reuse
of the same guard symbol in multiple headers is okay as long as they
cannot be included together.

Since we can avoid guard symbol reuse easily, do so: use guard symbol
$target_TARGET_SIGNAL_H for linux-user/$target/target_signal.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
55c5063c61 linux-user: Clean up target_cpu.h header guards
These headers all use TARGET_CPU_H as header guard symbol.  Reuse of
the same guard symbol in multiple headers is okay as long as they
cannot be included together.

Since we can avoid guard symbol reuse easily, do so: use guard symbol
$target_TARGET_CPU_H for linux-user/$target/target_cpu.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
3622634bc6 linux-user: Clean up target_syscall.h header guards
Some of them use guard symbol TARGET_SYSCALL_H, but we also have
CRIS_SYSCALL_H, MICROBLAZE_SYSCALLS_H, TILEGX_SYSCALLS_H and
__UC32_SYSCALL_H__.  They all upset scripts/clean-header-guards.pl.

Reuse of the same guard symbol TARGET_SYSCALL_H in multiple headers is
okay as long as they cannot be included together.  The script can't
tell, so it warns.

The script dislikes the other guard symbols, too.  They don't match
their file name (they should, to make guard collisions less likely),
and __UC32_SYSCALL_H__ is a reserved identifier.

Clean them all up: use guard symbol $target_TARGET_SYSCALL_H for
linux-user/$target/target_sycall.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
07f5a25875 target-*: Clean up cpu.h header guards
Most of them use guard symbols like CPU_$target_H, but we also have
__MIPS_CPU_H__ and __TRICORE_CPU_H__.  They all upset
scripts/clean-header-guards.pl.

The script dislikes CPU_$target_H because they don't match their file
name (they should, to make guard collisions less likely).  The others
are reserved identifiers.

Clean them all up: use guard symbol $target_CPU_H for
target-$target/cpu.h.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
2dbc4ebc17 scripts: New clean-header-guards.pl
The conventional way to ensure a header can be included multiple times
is to bracket it like this:

    #ifndef HEADER_NAME_H
    #define HEADER_NAME_H
    ...
    #endif

where HEADER_NAME_H is a symbol unique to this header.

The endif may be optionally decorated like this:

    #endif /* HEADER_NAME_H */

Unconventional ways present in our code:

* Identifiers reserved for any use:
    #define _FILEOP_H

* Lowercase (bad idea for object-like macros):
    #define __linux_video_vga_h__

* Roundabout ways to say the same thing (and hide from grep):
    #if !defined(__PPC_MAC_H__)
    #endif /* !defined(__PPC_MAC_H__) */

* Redundant values:
    #define HW_ALPHA_H 1

* Funny redundant values:
    # define PXA_H                 "pxa.h"

* Decorations with bangs:

    #endif /* !QEMU_ARM_GIC_INTERNAL_H */

  The negation actually makes sense, but almost all our header guard
  #endif decorations don't negate.

* Useless decorations:

   #endif  /* audio.h */

Header guards are not the place to show off creativity.  This script
normalizes them to the conventional way, and cleans up whitespace
while there.  It warns when it renames guard symbols, and explains how
to find occurences of these symbols that may have to be updated
manually.

Another issue is use of the same guard symbol in multiple headers.
That's okay only for headers that cannot be used together, such as the
*-user/*/target_syscall.h.  This script can't tell, so it warns when
it sees a reuse.

The script also warns when preprocessing a header with its guard
symbol defined produces anything but whitespace.

The next commits will put the script to use.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Markus Armbruster
a9c94277f0 Use #include "..." for our own headers, <...> for others
Tracked down with an ugly, brittle and probably buggy Perl script.

Also move includes converted to <...> up so they get included before
ours where that's obviously okay.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2016-07-12 16:19:16 +02:00
Peter Maydell
7d820b766a bswap.h: Document cpu_to_* and *_to_cpu conversion functions
Add a documentation comment describing the functions for
converting between the cpu and little or bigendian formats.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1467908460-27048-6-git-send-email-peter.maydell@linaro.org
2016-07-12 15:08:53 +01:00
Peter Maydell
cbe967f41d bswap.h: Fix comment typo
Fix a typo in a comment.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Stefan Weil <sw@weilnetz.de>
Message-id: 1467908460-27048-5-git-send-email-peter.maydell@linaro.org
2016-07-12 15:08:53 +01:00
Peter Maydell
f76bde7029 bswap.h: Remove unused cpu_to_*w() and *_to_cpup()
Now that all uses of cpu_to_*w() and *_to_cpup() have been replaced
with either ld*_p()/st*_p() or by doing direct dereferences and
using the cpu_to_*()/*_to_cpu() byteswap functions, we can remove
the unused implementations.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1467908460-27048-4-git-send-email-peter.maydell@linaro.org
2016-07-12 15:08:53 +01:00
Peter Maydell
43120576cb hw/bt: Don't use cpu_to_*w() and *_to_cpup()
Don't use cpu_to_*w() and *_to_cpup() to do byte-swapped loads
and stores; instead use ld*_p() and st*_p() which correctly handle
misaligned accesses.

Bring the HNDL() macro into line with how we deal with
PARAMHANDLE(), by using cpu_to_le16() rather than an ifdef
HOST_WORDS_BIGENDIAN.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1467908460-27048-3-git-send-email-peter.maydell@linaro.org
2016-07-12 15:08:53 +01:00
Peter Maydell
b442642da1 fsdev/9p-iov-marshal.c: Don't use cpu_to_*w() functions
Don't use the cpu_to_*w() functions, which we are trying to deprecate.
Instead just use cpu_to_*() to do the byteswap, which brings the
code in the marshal function in line with that in the unmarshal.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1467908460-27048-2-git-send-email-peter.maydell@linaro.org
2016-07-12 15:08:53 +01:00
Sergey Sorokin
b35399bb4e Fix confusing argument names in some common functions
There are functions tlb_fill(), cpu_unaligned_access() and
do_unaligned_access() that are called with access type and mmu index
arguments. But these arguments are named 'is_write' and 'is_user' in their
declarations. The patches fix the arguments to avoid a confusion.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-id: 1465907177-1399402-1-git-send-email-afarallax@yandex.ru
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 13:06:08 +01:00
Peter Maydell
74e1b782b3 Merge remote-tracking branch 'remotes/lalrae/tags/mips-20160712' into staging
MIPS patches 2016-07-12

Changes:
* support 10-bit ASIDs
* MIPS64R6-generic renamed to I6400
* initial GIC support
* implement RESET_BASE register in CM GCR

# gpg: Signature made Tue 12 Jul 2016 11:49:50 BST
# gpg:                using RSA key 0x52118E3C0B29DA6B
# gpg: Good signature from "Leon Alrae <leon.alrae@imgtec.com>"
# Primary key fingerprint: 8DD3 2F98 5495 9D66 35D4  4FC0 5211 8E3C 0B29 DA6B

* remotes/lalrae/tags/mips-20160712:
  target-mips: enable 10-bit ASIDs in I6400 CPU
  target-mips: support CP0.Config4.AE bit
  target-mips: change ASID type to hold more than 8 bits
  target-mips: add ASID mask field and replace magic values
  target-mips: replace MIPS64R6-generic with the real I6400 CPU model
  hw/mips_cmgcr: implement RESET_BASE register in CM GCR
  hw/mips_cpc: make VP correctly start from the reset vector
  target-mips: add exception base to MIPS CPU
  hw/mips/cps: create GIC block inside CPS
  hw/mips: implement Global Interrupt Controller
  hw/mips: implement GIC Interval Timer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 12:34:41 +01:00
Peter Maydell
c1ac514a04 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160712-1' into staging
usb: misc fixes.

# gpg: Signature made Tue 12 Jul 2016 09:47:21 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20160712-1:
  xen-usb: Fix 32bit build
  usb: add storage hotplug documentation
  nec-usb-xhci: set the device state to USB_STATE_DEFAULT

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 12:03:29 +01:00
Peter Maydell
494edbf0b4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-input-20160712-1' into staging
msmouse: fix misc issues, switch to new input interface.
input: add trace events for full queues.
input-linux: better capability checks and event handling.

# gpg: Signature made Tue 12 Jul 2016 09:20:36 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-input-20160712-1:
  input-linux: better capability checks, merge input_linux_event_{mouse, keyboard}
  input-linux: factor out input_linux_handle_keyboard
  input-linux: factor out input_linux_handle_mouse
  input: add trace events for full queues
  msmouse: send short messages if possible.
  msmouse: switch to new input interface
  msmouse: fix buffer handling
  msmouse: add MouseState, unregister handler on close

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 10:58:14 +01:00
Peter Maydell
910789c220 Merge remote-tracking branch 'remotes/kraxel/tags/pull-vnc-20160712-1' into staging
vnc: misc bugfixes.

# gpg: Signature made Tue 12 Jul 2016 08:22:40 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-vnc-20160712-1:
  ui: avoid crash if vnc client disconnects with writes pending
  vnc-enc-tight: use thread local storage for palette
  vnc: fix incorrect checking condition when updating client

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-12 09:49:04 +01:00
Anthony PERARD
042ec47e68 xen-usb: Fix 32bit build
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Message-id: 20160623110829.22671-1-anthony.perard@citrix.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-12 10:47:03 +02:00
Gerd Hoffmann
b91e013982 usb: add storage hotplug documentation
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466667901-1341-1-git-send-email-kraxel@redhat.com
2016-07-12 10:25:30 +02:00
Zhang Shuaiyi
a4055d8586 nec-usb-xhci: set the device state to USB_STATE_DEFAULT
This patch is a rough fix to "hw/usb/core.c:401: usb_handle_packet:
 Assertion `dev->state == 3' failed.". Qemu will crash when a usb3
device redirect to Windows7 VM via nec-usb-xhci.

In extensible-host-controler-interface-usb-xhci.pdf P94(4.6.5
Address Device):
    • If the Block Set Address Request (BSR) flag = ‘1’
        • If the slot is in the Enabled state:
            ...
            • Set the Slot State in the Output Slot Context to Default.

BSR = ‘1’: Enabled state to Default state; BSR = ‘0’: Default state
to Addressed state. Try to call usb_device_reset to set device state
to USB_STATE_DEFAULT in xhci_address_slot wether bsr is zero.

Signed-off-by: Zhang Shuaiyi <zhang_syi@massclouds.com>
Message-id: 1467258640-11921-1-git-send-email-zhang_syi@massclouds.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-12 10:23:59 +02:00
Leon Alrae
cdc46fab07 target-mips: enable 10-bit ASIDs in I6400 CPU
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:21 +01:00
Paul Burton
a0c8060841 target-mips: support CP0.Config4.AE bit
The read-only Config4.AE bit set denotes extended 10 bits ASID.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:20 +01:00
Paul Burton
2d72e7b047 target-mips: change ASID type to hold more than 8 bits
ASID currently has uint8_t type which is too small since some processors
support more than 8 bits ASID. Therefore change its type to uint16_t.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:19 +01:00
Paul Burton
6ec98bd7b6 target-mips: add ASID mask field and replace magic values
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:18 +01:00
Leon Alrae
8f95ad1c79 target-mips: replace MIPS64R6-generic with the real I6400 CPU model
MIPS64R6-generic gradually gets closer to I6400 CPU, feature-wise. Rename
it to make it clear which MIPS processor it is supposed to emulate.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:17 +01:00
Leon Alrae
c09199fe73 hw/mips_cmgcr: implement RESET_BASE register in CM GCR
Implement RESET_BASE register which is local to each VP and a write to
it changes VP's reset exception base. Also, add OTHER register to
allow a software running on one VP to access other VP's local registers.

Guest can use this mechanism to specify custom address from which a VP
will start execution.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:16 +01:00
Leon Alrae
dff94251f0 hw/mips_cpc: make VP correctly start from the reset vector
When VP enters the Run state it starts execution from the reset vector.
Currently used CPU_INTERRUPT_WAKE does not do that if reset exception
base has been modified. Therefore fix that by simply resetting given VP.

Drop the usage of CPU_INTERRUPT_WAKE also in VP_STOP and instead raise
the CPU_INTERRUPT_HALT to halt a VP.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:15 +01:00
Leon Alrae
89777fd10f target-mips: add exception base to MIPS CPU
Replace hardcoded 0xbfc00000 with exception_base which is initialized with
this default address so there is no functional change here.
However, it is now exposed and consequently it will be possible to modify
it from outside of the CPU.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:14 +01:00
Leon Alrae
19494f811a hw/mips/cps: create GIC block inside CPS
Add GIC to CPS and expose its interrupt pins instead of CPU's.

Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:13 +01:00
Yongbok Kim
e8bd336dd1 hw/mips: implement Global Interrupt Controller
The Global Interrupt Controller (GIC) is responsible for mapping each
internal and external interrupt to the correct location for servicing.

The internal representation of registers is different from the specification
in order to consolidate information for each GIC Interrupt Sources and Virtual
Processors with same functionalities. For example SH_MAP00_VP00 registers are
defined like each bit represents a VP but in this implementation the equivalent
map_vp contains VP number in integer form for ease accesses. When it is being
accessed via read write functions an internal data is converted back into the
original format as the specification.

Limitations:
Level triggering only
GIC CounterHi not implemented (Countbits = 32bits)
DINT not implemented
Local WatchDog, Fast Debug Channel, Perf Counter not implemented

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:12 +01:00
Yongbok Kim
405140519f hw/mips: implement GIC Interval Timer
The interval timer is similar to the CP0 Count/Compare timer within
each processor. The difference is the GIC_SH_COUNTER register is global
to the system so that all processors have the same time reference.

To ease implementation, all VPs are having its own QEMU timer but sharing
global settings and registers such as GIC_SH_CONFIG.COUTNSTOP and
GIC_SH_COUNTER.

MIPS GIC Interval Timer does support upto 64 bits of Count register but
in this implementation it is limited to 32 bits only.

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
2016-07-12 09:10:09 +01:00
Gerd Hoffmann
2e6a64cb8d input-linux: better capability checks, merge input_linux_event_{mouse, keyboard}
Improve capability checks (count keys and buttons), store results.

Merge the input_linux_event_mouse and input_linux_event_keyboard
functions into one, dispatch into input_linux_handle_mouse and
input_linux_handle_keyboard depending on device capabilities.

Allow calling both handle functions, so we can handle mice which
also send key events, by routing those key events to the keyboard.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466067800-25434-4-git-send-email-kraxel@redhat.com
2016-07-12 09:25:50 +02:00
Gerd Hoffmann
2330e9e7cc input-linux: factor out input_linux_handle_keyboard
No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466067800-25434-3-git-send-email-kraxel@redhat.com
2016-07-12 09:25:50 +02:00
Gerd Hoffmann
d4df42c431 input-linux: factor out input_linux_handle_mouse
No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466067800-25434-2-git-send-email-kraxel@redhat.com
2016-07-12 09:25:50 +02:00
Gerd Hoffmann
c80276b420 input: add trace events for full queues
It isn't unusual to happen, for example during reboot when the guest
doesn't reveice events for a while.  So better don't flood stderr
with alarming messages.  Turn them into tracepoints instead so they
can be enabled in case they are needed for trouble-shooting.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466675495-28797-1-git-send-email-kraxel@redhat.com
2016-07-12 09:25:28 +02:00
Gerd Hoffmann
d7b7f526b1 msmouse: send short messages if possible.
Keep track of button changes.  Send the extended 4-byte messages for
three button mice only in case we have something to report for the
middle button.  Use the short 3-byte messages (original protocol for
two-button microsoft mouse) otherwise.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1467625375-31774-5-git-send-email-kraxel@redhat.com
2016-07-12 09:24:31 +02:00
Gerd Hoffmann
96d7c0720e msmouse: switch to new input interface
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1467625375-31774-4-git-send-email-kraxel@redhat.com
2016-07-12 09:24:31 +02:00
Gerd Hoffmann
57a4e3b92b msmouse: fix buffer handling
The msmouse chardev backend writes data without checking whenever there
is enough space.

That happens to work with linux guests, probably by pure luck because
the linux driver enables the fifo and the serial port emulation accepts
more data than announced via qemu_chr_be_can_write() in that case.

Handle this properly by adding a buffer to MouseState.  Hook up a
CharDriverState->accept_input() handler which feeds the buffer to the
serial port.  msmouse_event() only fills the buffer now, and calls the
accept_input handler too to kick off the transmission.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1467625375-31774-3-git-send-email-kraxel@redhat.com
2016-07-12 09:24:31 +02:00
Gerd Hoffmann
cde8dcbc92 msmouse: add MouseState, unregister handler on close
Add struct to track serial mouse state.  Store mouse event handler
there.  Unregister properly on chardev close.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1467625375-31774-2-git-send-email-kraxel@redhat.com
2016-07-12 09:24:31 +02:00
Daniel P. Berrange
ea69744988 ui: avoid crash if vnc client disconnects with writes pending
The vnc_client_read() function is called from the vnc_client_io()
event handler callback when there is incoming data to process.
If it detects that the client has disconnected, then it will
trigger cleanup and free'ing of the VncState client struct at
a safe time.

Unfortunately, the vnc_client_io() event handler will also call
vnc_client_write() to handle any outgoing data writes. So if
vnc_client_io() was invoked with both G_IO_IN and G_IO_OUT
events set, and the client disconnects, we may try to write to
a client which has just been freed.

https://bugs.launchpad.net/qemu/+bug/1594861

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1467042529-3372-1-git-send-email-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-12 08:34:13 +02:00
Peter Lieven
095497ffc6 vnc-enc-tight: use thread local storage for palette
currently the color counting palette is allocated from heap, used and destroyed
for each single subrect. Use a static palette per thread for this purpose and
avoid the malloc and free for each update.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1467280846-9674-1-git-send-email-pl@kamp.de
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-12 08:34:13 +02:00
Gonglei
5a693efda8 vnc: fix incorrect checking condition when updating client
vs->disconnecting is set to TRUE and vs->ioc is closed, but
vs->ioc isn't set to NULL, so that the vnc_disconnect_finish()
isn't invoked when you update client in vnc_update_client()
after vnc_disconnect_start invoked. Let's using change the checking
condition to avoid resource leak.

Signed-off-by: Haibin Wang <wanghaibin.wang@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1467949056-81208-1-git-send-email-arei.gonglei@huawei.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-12 08:34:13 +02:00
Peter Maydell
f1ef557866 Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20160711' into staging
Last round of s390x patches for 2.7:
- A large update of the s390x PCI code, bringing it in line with
  the architecture
- Fixes and improvements in the ipl (boot) code
- Refactoring in the css code

# gpg: Signature made Mon 11 Jul 2016 09:04:51 BST
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20160711: (25 commits)
  s390x/pci: make hot-unplug handler smoother
  s390x/pci: replace fid with idx in msg data of msix
  s390x/pci: fix stpcifc_service_call
  s390x/pci: refactor list_pci
  s390x/pci: refactor s390_pci_find_dev_by_idx
  s390x/pci: add checkings in CLP_SET_PCI_FN
  s390x/pci: enable zpci hot-plug/hot-unplug
  s390x/pci: enable uid-checking
  s390x/pci: introduce S390PCIBusDevice qdev
  s390x/pci: introduce S390PCIIOMMU
  s390x/pci: introduce S390PCIBus
  s390x/pci: enforce zPCI state checking
  s390x/pci: refactor s390_pci_find_dev_by_fh
  s390x/pci: unify FH_ macros
  s390x/pci: write fid in CLP_QUERY_PCI_FN
  s390x/pci: acceleration for getting S390pciState
  s390x/pci: fix failures of dma map/unmap
  s390x/css: Unplug handler of virtual css bridge
  s390x/css: Factor out virtual css bridge and bus
  s390x/css: use define for "virtual-css-bridge" literal
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-11 18:46:38 +01:00
Peter Maydell
7de2cc8f78 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160708' into staging
two self-modifying code fixes

# gpg: Signature made Fri 08 Jul 2016 21:28:50 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160708:
  translate-all: Fix user-mode self-modifying code in 2 page long TB
  cputlb: Fix for self-modifying writes across page boundaries
  cputlb: Add address parameter to VICTIM_TLB_HIT
  cputlb: Move VICTIM_TLB_HIT out of line

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-11 17:17:02 +01:00
Peter Maydell
a91a4e7d8c Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
x86 and machine queue, 2016-07-07

Highlights:
* Improvements on global property error handling
* Translate -cpu options to global properties
* LMCE support

# gpg: Signature made Thu 07 Jul 2016 20:59:01 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-pull-request:
  target-i386: Enable LMCE for '-cpu host' if supported by host
  target-i386: Publish advised value of MSR_IA32_FEATURE_CONTROL via fw_cfg
  target-i386: kvm: Add basic Intel LMCE support
  target-i386: Report hyperv feature words through qom
  target-i386: Show host and VM TSC frequencies on mismatch
  pc: Parse CPU features only once
  arm: virt: Parse cpu_model only once
  cpu: Use CPUClass->parse_features() as convertor to global properties
  target-i386: Avoid using locals outside their scope
  target-i386: TCG can support CPUID.07H:EBX.erms
  target-sparc: Use sparc_cpu_parse_features() directly
  vl: Set errp to &error_abort on machine compat_props
  machine: Add machine_register_compat_props() function
  qdev: GlobalProperty.errp field
  qdev: Eliminate qemu_add_globals() function
  qdev: Don't stop applying globals on first error

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-11 15:08:47 +01:00
Peter Maydell
b3b22db69f Merge remote-tracking branch 'remotes/rth/tags/pull-rth-20160710' into staging
build fix for travis

# gpg: Signature made Sun 10 Jul 2016 18:07:02 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-rth-20160710:
  build: Use $(AS) for optionrom explicitly
  linux-user: Fix i386 safe-syscall.S

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-11 14:10:09 +01:00
Gerd Hoffmann
f7d3f8c0c0 gtk: fix build
Commit "9d8256e virgl: pass whole GL scanout dimensions" missed the
opengl code path for gtk versions >= 3.16.  Update that one too and
fix the build with recent gtk versions.

Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1467876563-1351-1-git-send-email-kraxel@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-11 10:40:29 +01:00
Yi Min Zhao
93d16d81c8 s390x/pci: make hot-unplug handler smoother
The current implementation of hot-unplug handler is abrupt. Any pci
operation will be just rejected if pci device is unconfigured. Thus a
pci device can not be reset or destroyed in a right, smooth and safe
way.

Improve this as follows:
- Notify the guest via a HP_EVENT_DECONFIGURE_REQUEST(0x303) event in
  the unplug handler, giving it a chance to deconfigure the device via
  sclp and allowing us to continue hot-unplug afterwards.
- Set up a timer that will generate the HP_EVENT_CONFIGURE_TO_STBRES
  (0x304) event as before if the guest did not react after an adequate
  time.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
cdd85eb280 s390x/pci: replace fid with idx in msg data of msix
Present code uses fid as the part of message data of msix for looking
up the specific zpci device. However it limits the usable range of fid,
and the code looking up the zpci device may fail due to truncation of
the fid.

In addition, fh is composed of enabled bit, FH_VIRT and the array index.
So we can use the array index as the identifier to store in msg data.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
0a608a6e13 s390x/pci: fix stpcifc_service_call
Firstly the function misses dmaas checking. This patch adds it.

Secondly the function uses s390_pci_find_dev_by_fh() to look up the
zpci device. This may fail if the guest provides a valid and disabled
fh but fh of the associated zpci device is enabled. Thus we use
s390_pci_find_dev_by_idx() instead.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
4e3bfc167d s390x/pci: refactor list_pci
Because of the refactor of s390_pci_find_dev_by_idx(), list_pci()
should be updated. We introduce a new function to get the next
available zpci device. It simplifies the code of looking up zpci
devices.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
ab9746570a s390x/pci: refactor s390_pci_find_dev_by_idx
s390_find_dev_by_idx() only indexes usable zpci devices. It implies
that the index value of each zpci device is dynamic and may change if
a new zpci device is plugged. So we have to use a constant index to
look up the device.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
bd4976838d s390x/pci: add checkings in CLP_SET_PCI_FN
The code in CLP_SET_PCI_FN case misses some checkings. Let's add
them.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
af9ed379fc s390x/pci: enable zpci hot-plug/hot-unplug
We need to support hot-plug/hot-unplug for the new zpci devices as
well. This patch enables the present hot-plug/hot-unplug handlers
to support not only generic pci devices but also zpci devices.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
bf328399da s390x/pci: enable uid-checking
The uid-checking facility guarantees uniqueness of the uid within the
vm and exposes the real uid to the guest when listing pci devices.
Let's always enable it and present it to the guest in the response to
the list pci clp command.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
3e5cfba3ca s390x/pci: introduce S390PCIBusDevice qdev
To support definitions of s390 pci attributes in Qemu cmdline, we have
to make current S390PCIBusDevice struct inherit DeviceState and add
three properties for it. Currently we only support definitions of uid
and fid.

'uid' is optionally defined by users, identifies a zpci device and
must be defined with a 16-bit and non-zero unique value.

'fid' ranges from 0x0 to 0xFFFFFFFF. For fid property, we introduce a
new PropertyInfo by the name of s390_pci_fid_propinfo with our special
setter and getter. As 'fid' is optional, introduce 'fid_defined' to
track whether the user specified a fid.

'target' field is to direct qemu to find the corresponding generic PCI
device. It is equal to the 'id' value of one of generic pci devices.
If the user doesn't specify 'id' parameter for a generic pci device,
its 'id' value will be generated automatically and use this value as
'target' to create an associated zpci device.

If the user did not specify 'uid' or 'fid', values are generated
automatically. 'target' is required.

In addition, if a pci device has no associated zpci device, the code
will generate a zpci device automatically for it.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
67d5cd9722 s390x/pci: introduce S390PCIIOMMU
Currently each zpci device holds its own DMA address space and memory
region. At the same time, all instances of zpci device are stored in
S390pciState. So duirng the initialization of S390pciState, all zpci
devices are created and then all DMA address spaces are created. Thus,
when initializing pci devices, their corresponding DMA address spaces
could be found.

But zpci qdev will be introduced later. Zpci device may be initialized
and plugged afterwards generic pci device. So we should initialize all
DMA address spaces and memory regions before initializing zpci devices.

We introduce a new struct named S390PCIIOMMU. And a new field of
S390pciState, which is an array to store all instances of S390PCIIOMMU,
is added so that qemu pci code could find the corresponding DMA
address space when initializing a generic pci device. And this should
be done before the connection of a zpci device and a generic pci
device is built.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
90a0f9afec s390x/pci: introduce S390PCIBus
To enable S390PCIBusDevice as qdev, there should be a new bus to
plug and manage all instances of S390PCIBusDevice. Due to this,
S390PCIBus is introduced.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
5d1abf2344 s390x/pci: enforce zPCI state checking
Current code uses some fields combinatorially to indicate the state of
a s390 pci device. This patch introduces device states in order to make
the code more readable and more logical.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
06a96dae11 s390x/pci: refactor s390_pci_find_dev_by_fh
Because this function is called very frequently, we should use a more
effective way to find the zpci device. So we use the FH's index to get
the device directly.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
c188e30315 s390x/pci: unify FH_ macros
Present code uses some macros to structure PCI Function Handle. But
their names don't have a uniform format. Let's use FH_MASK_ as the
unified prefix.

While we're at it, differentiate the SHM bits: use different bits for
vfio and emulated devices.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
67aad508de s390x/pci: write fid in CLP_QUERY_PCI_FN
We forgot to write the fid; fix that.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
e7d336959b s390x/pci: acceleration for getting S390pciState
There are a number of places where the code needs to get the instance
of S390pciState. It calls object_resolve_path() every time. This
wastes a lot of time and leads to low performance. Thus we add
s390_get_phb() to improve it.

Because we always have a phb, we remove all return checkings in the
callers and add an assert in s390_get_phb() to make sure that phb is
getted successfully.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Yi Min Zhao
f7c40aa1e7 s390x/pci: fix failures of dma map/unmap
In commit d78c19b5cf, vfio code stores
the IOMMU's offset_within_address_space and adjusts the IOVA before
calling vfio_dma_map/vfio_dma_unmap. But s390_translate_iommu already
considers the base address of an IOMMU memory region.

Thus we use pal as the size and 0x0 as the base address to initialize
IOMMU memory subregion.

Signed-off-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Jing Liu
b804e8a62a s390x/css: Unplug handler of virtual css bridge
The previous patch moved virtual css bridge and bus out from
virtio-ccw, but kept the direct reference of virtio-ccw specific
unplug function inside css-bridge.c.

To make the virtual css bus and bridge useful for non-virtio devices,
this introduces a common unplug function pointer "unplug" to call
specific virtio-ccw unplug parts. Thus, the tight coupling to
virtio-ccw can be removed.

This unplug pointer is a member of CCWDeviceClass, which is introduced
as an abstract device layer called "ccw-device". This layer is between
DeviceState and specific devices which are plugged in virtual css bus,
like virtio-ccw device. The specific unplug handlers should be assigned
to "unplug" during initialization.

Signed-off-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Reviewed-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Reviewed-by: Yi Min Zhao <zyimin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Jing Liu
dd70bd0d4c s390x/css: Factor out virtual css bridge and bus
Currently, common base layers virtual css bridge and bus are
defined in hw/s390x/virtio-ccw.c(h). In order to support
multiple types of devices in the virtual channel subsystem,
especially non virtio-ccw, refactoring work needs to be done.

This work is just a pure code move without any functional change
except dropping an empty function virtual_css_bridge_init() and
virtio_ccw_busdev_unplug() changing. virtio_ccw_busdev_unplug()
is specific to virtio-ccw but gets referenced from the common
virtual css bridge code. To keep the functional changes to a
minimum we export this function from virtio-ccw.c and continue
to reference it inside virtual_css_bridge_class_init()
(now living in hw/s390x/css-bridge.c). A follow-up patch will
clean this up.

Signed-off-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Reviewed-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Sascha Silbe
3f9e485964 s390x/css: use define for "virtual-css-bridge" literal
Introduce a TYPE_* define (like we already use for a couple of other
QOM types) for the name of the virtual CSS bridge QOM type instead of
sprinkling the same string literal over several source files.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Sascha Silbe
cf2499350a s390x/css: factor out some generic code from virtio_ccw_device_realize()
A lot of what virtio_ccw_device_realize() does isn't specific to
virtio; it would apply to emulated CCW as well. Factor it out to make
it easier to implement emulated CCW devices later on.

Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Reviewed-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
David Hildenbrand
bb0995468a s390x/ipl: fix reboots for migration from different bios
When migrating from a different QEMU version, the start_address and
bios_start_address may differ. During migration these values are migrated
and overwrite the values that were detected by QEMU itself.

On a reboot, QEMU will reload its own BIOS, but use the migrated start
addresses, which does not work if the values differ.

Fix this by not relying on the migrated values anymore, but still
provide them during migration, so existing QEMUs continue to work.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Alexander Yarygin
e468b6730c s390x/ipl: Support IPL from selected SCSI device
If bootindex is specified for a device, we need to IPL from
it. Currently it works for ccw devices, but not for SCSI. To be able to
IPL from the specific device, pc-bios needs to know its address.
For this reason we add special QEMU_SCSI IPL type into the IPLB
structure, that contains the scsi device address.

We enhance the ipl block with a currently qemu-only parameter block
that allows us to specify a concrete scsi device.

Signed-off-by: Alexander Yarygin <yarygin@linux.vnet.ibm.com>
Reviewed-by: Eric Farman <farman@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Cornelia Huck
07c6f329bd pc-bios/s390-ccw.img: rebuild image
Contains:
- pc-bios/s390-ccw: Pass selected SCSI device to IPL

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Eugene (jno) Dvurechenski
b39b7718dc pc-bios/s390-ccw: Pass selected SCSI device to IPL
There is ,bootindex=%d argument to specify the lookup order of
boot devices.

If a bootindex assigned to the device, then IPL Parameter Info Block
is created for that device when it is IPLed from.

If it is a mere SCSI device (not FCP), then IPIB is created with a
special SCSI type and its fields are used to store SCSI address of the
device. This new ipl block is private to qemu for now.

If the device to IPL from is specified this way, then SCSI bus lookup
is bypassed and prescribed devices uses the address specified.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Alexander Yarygin <yarygin@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2016-07-11 09:48:05 +02:00
Richard Henderson
cdbd727c20 build: Use $(AS) for optionrom explicitly
For clang before 3.5, -fno-integrated-as does not exist,
so the workaround in 5f6f0e27fb fails to build.

Use clang's default assembler for linux-user/safe-syscall.S,
and explicitly change to use the system assembler for the
option roms.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-10 10:05:46 -07:00
Stanislav Shmarov
7399a337e4 translate-all: Fix user-mode self-modifying code in 2 page long TB
In user-mode emulation Translation Block can consist of 2 guest pages.
In that case QEMU also mprotects 2 host pages that are dedicated for
guest memory, containing instructions. QEMU detects self-modifying code
with SEGFAULT signal processing.

In case if instruction in 1st page is modifying memory of 2nd
page (or vice versa) QEMU will mark 2nd page with PAGE_WRITE,
invalidate TB, generate new TB contatining 1 guest instruction and
exit to CPU loop. QEMU won't call mprotect, and new TB will cause
same SEGFAULT. Page will have both PAGE_WRITE_ORG and PAGE_WRITE
flags, so QEMU will handle the signal as guest binary problem,
and exit with guest SEGFAULT.

Solution is to do following: In case if current TB was invalidated
continue to invalidate TBs from remaining guest pages and mark pages
as PAGE_WRITE. After that disable host page protection with mprotect.
If current tb was invalidated longjmp to main loop. That is more
efficient, since we won't get SEGFAULT when executing new TB.

Reviewed-by: Sergey Fedorov <sergey.fedorov@linaro.org>
Signed-off-by: Stanislav Shmarov <snarpix@gmail.com>
Message-Id: <1467880392-1043630-1-git-send-email-snarpix@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-08 13:17:38 -07:00
Samuel Damashek
81daabaf7a cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.

In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.

Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.

The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.

Signed-off-by: Samuel Damashek <samuel.damashek@invincea.com>
Message-Id: <20160706182652.16190-1-samuel.damashek@invincea.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-08 13:05:07 -07:00
Samuel Damashek
a390284b80 cputlb: Add address parameter to VICTIM_TLB_HIT
[rth: Split out from the original patch.]

Signed-off-by: Samuel Damashek <samuel.damashek@invincea.com>
Message-Id: <20160706182652.16190-1-samuel.damashek@invincea.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-08 13:04:41 -07:00
Richard Henderson
7e9a7c50d9 cputlb: Move VICTIM_TLB_HIT out of line
There are currently 22 invocations of this function,
and we're about to increase that number.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-08 12:58:55 -07:00
Richard Henderson
4aa3f4dd5b linux-user: Fix i386 safe-syscall.S
Clang insists that "cmp" is ambiguous with a memory destination,
requiring an explicit size suffix.

There was a true error in the use of .cfi_def_cfa_offset in the
epilogue, but changing to use the proper .cfi_adjust_cfa_offset
runs afoul of a clang bug wrt .cfi_restore_state.  Better to
fold the two epilogues so that we don't trigger the bug.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-07 21:39:22 -07:00
Haozhong Zhang
40bfe48f1c target-i386: Enable LMCE for '-cpu host' if supported by host
If -cpu host is used, LMCE will be automatically enabled when it's
supported by host.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:40 -03:00
Haozhong Zhang
217f1b4a72 target-i386: Publish advised value of MSR_IA32_FEATURE_CONTROL via fw_cfg
It's a prerequisite that certain bits of MSR_IA32_FEATURE_CONTROL should
be set before some features (e.g. VMX and LMCE) can be used, which is
usually done by the firmware. This patch adds a fw_cfg file
"etc/msr_feature_control" which contains the advised value of
MSR_IA32_FEATURE_CONTROL and can be used by guest firmware (e.g. SeaBIOS).

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:31 -03:00
Ashok Raj
87f8b62604 target-i386: kvm: Add basic Intel LMCE support
This patch adds the support to inject SRAR and SRAO as LMCE, i.e. they
are injected to only one VCPU rather than broadcast to all VCPUs. As KVM
reports LMCE support on Intel platforms, this features is only available
on Intel platforms.

LMCE is disabled by default and can be enabled/disabled by cpu option
'lmce=on/off'.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
[Haozhong: Enable LMCE only on Intel platforms
           Disable LMCE by default and add a cpu option 'lmce'
           Handle the error if LMCE is enabled w/o host support
           Remove MCG_LMCE_P from MCE_CAP_DEF
           Add migration support for LMCE
           Minor code style changes]
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:16 -03:00
Evgeny Yakovlev
c35bd19a5c target-i386: Report hyperv feature words through qom
This change adds hyperv feature words report through qom rpc.

When VM is configured with hyperv features enabled
libvirt will check that required feature words are set
in cpuid leaf 40000003 through qom request.

Currently qemu does not report hyperv feature words
which prevents windows guests from starting with libvirt.

To avoid conflicting with current hyperv properties all added feature
words cannot be set directly with -cpu +feature yet.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Richard Henderson <rth@twiddle.net>
CC: Eduardo Habkost <ehabkost@redhat.com>
CC: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:13 -03:00
Eduardo Habkost
d6276d26bd target-i386: Show host and VM TSC frequencies on mismatch
Improve the TSC frequency mismatch warning to show the host and
VM TSC frequencies.

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:11 -03:00
Igor Mammedov
6aff24c6a6 pc: Parse CPU features only once
Considering that features are converted to global properties and
global properties are automatically applied to every new instance
of created CPU (at object_new() time), there is no point in
parsing cpu_model string every time a CPU created. So move
parsing outside CPU creation loop and do it only once.

Parsing also should be done before any CPU is created so that
features would affect the first CPU a well.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:06 -03:00
Igor Mammedov
09f71b054a arm: virt: Parse cpu_model only once
Considering that features are converted to global properties and
global properties are automatically applied to every new instance
of created CPU (at object_new() time), there is no point in
parsing cpu_model string every time a CPU created. So move
parsing outside CPU creation loop and do it only once.

Parsing also should be done before any CPU is created so that
features would affect the first CPU a well.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:05 -03:00
Igor Mammedov
62a48a2a57 cpu: Use CPUClass->parse_features() as convertor to global properties
Currently CPUClass->parse_features() is used to parse -cpu
features string and set properties on created CPU instances.

But considering that features specified by -cpu apply to every
created CPU instance, it doesn't make sense to parse the same
features string for every CPU created. It also makes every target
that cares about parsing features string explicitly call
CPUClass->parse_features() parser, which gets in a way if we
consider using generic device_add for CPU hotplug as device_add
has not a clue about CPU specific hooks.

Turns out we can use global properties mechanism to set
properties on every created CPU instance for a given type. That
way it's possible to convert CPU features into a set of global
properties for CPU type specified by -cpu cpu_model and common
Device.device_post_init() will apply them to CPU of given type
automatically regardless whether it's manually created CPU or CPU
created with help of device_add.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:01 -03:00
Paolo Bonzini
cf2887c973 target-i386: Avoid using locals outside their scope
x86_cpu_parse_featurestr has a "val = num;" assignment just before num
goes out of scope.  Push num up to fix the issue.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:25:00 -03:00
Paolo Bonzini
7eb24386db target-i386: TCG can support CPUID.07H:EBX.erms
ERMS just says "rep movsb" and "rep stosb" are fast.  It does not
imply any new instruction, so we can support it easily.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:59 -03:00
Igor Mammedov
fb02d56e96 target-sparc: Use sparc_cpu_parse_features() directly
Make SPARC target use sparc_cpu_parse_features() directly
so it won't get in the way of switching other propertified
targets to handling features as global properties.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:57 -03:00
Eduardo Habkost
adae837d40 vl: Set errp to &error_abort on machine compat_props
Use the new GlobalProperty.errp field to handle compat_props
errors.

Example output before this change:
(with an intentionally broken entry added to PC_COMPAT_1_3 just
for testing)

  $ qemu-system-x86_64 -machine pc-1.3
  qemu-system-x86_64: hw/core/qdev-properties.c:1091: qdev_prop_set_globals_for_type: Assertion `prop->user_provided' failed.
  Aborted (core dumped)

After:

  $ qemu-system-x86_64 -machine pc-1.3
  Unexpected error in x86_cpuid_set_vendor() at /home/ehabkost/rh/proj/virt/qemu/target-i386/cpu.c:1688:
  qemu-system-x86_64: can't apply global cpu.vendor=x: Property '.vendor' doesn't take value 'x'
  Aborted (core dumped)

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:55 -03:00
Eduardo Habkost
39a3b377b8 machine: Add machine_register_compat_props() function
Move the compat_props handling to core machine code.

Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:54 -03:00
Eduardo Habkost
77280adbdf qdev: GlobalProperty.errp field
The new field will allow error handling to be configured by
qdev_prop_register_global() callers: &error_fatal and
&error_abort can be used to make QEMU exit or abort if any errors
are reported when applying the properties.

While doing it, change the error message from "global %s.%s=%s
ignored" to "can't apply global %s.%s=%s".

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:52 -03:00
Eduardo Habkost
8d76bfe8f8 qdev: Eliminate qemu_add_globals() function
The function is just a helper to handle the -global options, it
can stay in vl.c like most qemu_opts_foreach() calls.

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:50 -03:00
Eduardo Habkost
823efc5d26 qdev: Don't stop applying globals on first error
qdev_prop_set_globals_for_type() stops applying global properties
on the first error. It is a leftover from when QEMU exited on any
error when applying global property. Commit 25f8dd9 changed the
fatal error to a warning, but neglected to drop the stopping.
Fix that.

For example, the following command-line will not set CPUID level
to 3, but will warn only about "x86_64-cpu.vendor" being ignored.

  $ ./x86_64-softmmu/qemu-system-x86_64 \
      -global x86_64-cpu.vendor=x \
      -global x86_64-cpu.level=3
  qemu-system-x86_64: Warning: global x86_64-cpu.vendor=x ignored: Property '.vendor' doesn't take value 'x'

Fix this by not returning from qdev_prop_set_globals_for_type()
on the first error.

Cc: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-07-07 15:24:47 -03:00
Peter Maydell
4f4a9ca4a4 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160707' into staging
target-arm queue:
 * fix a wrong variable type for A64 SYS_HEAPINFO semihosting call
 * xlnx_dp: fix iffy xlnx_dp_aux_push_tx_fifo
 * aux: fix break that wanted to break two levels out
 * aux: Rename aux.[ch] to auxbus.[ch] for the benefit of Windows
 * hw/block/m25p80: fix resource leak
 * i.MX: split the GPT timer implementation into per SOC definitions

# gpg: Signature made Thu 07 Jul 2016 14:48:09 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160707:
  i.MX: split the GPT timer implementation into per SOC definitions
  hw/block/m25p80: fix resource leak
  aux: Rename aux.[ch] to auxbus.[ch] for the benefit of Windows
  aux: fix break that wanted to break two levels out
  xlnx_dp: fix iffy xlnx_dp_aux_push_tx_fifo
  target-arm/arm-semi.c: In SYS_HEAPINFO use correct type for 'limit'

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 14:49:38 +01:00
Jean-Christophe Dubois
66542f6399 i.MX: split the GPT timer implementation into per SOC definitions
In various Freescale SOCs, the GPT timers can be configured to select
its input clock.

Depending on the SOC the set of available input clocks may vary.

The actual single GPT definition was no good enough and because of it
booting the sabrelite board with a i.MX6DL device tree would fail
because of an incorrect input clock definition for the i.MX6DL SOC.

This patch fixes the i.MX6DL boot failure by adding the ability to
define a different set of input clocks depending on the considered SOC.

A different class has been defined for i.MX25, i.MX31 and i.MX6 each with
its specific set of input clocks.

The patch has been tested by booting KZM, i.MX25 PDK, i.MX6Q sabrelite
and i.MX6DL sabrelite.

Signed-off-by: Jean-Christophe Dubois <jcd@tribudubois.net>
Message-id: 1467325619-8374-1-git-send-email-jcd@tribudubois.net
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: fixed spacing round '/' operator]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 13:47:01 +01:00
Shannon Zhao
eef9f19eea hw/block/m25p80: fix resource leak
These two are spot by Coverity 1357232 and 1357233.

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
Message-id: 1467684998-12076-1-git-send-email-zhaoshenglong@huawei.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 13:47:01 +01:00
Peter Maydell
e0dadc1e9e aux: Rename aux.[ch] to auxbus.[ch] for the benefit of Windows
On Windows 'aux.*' is a reserved name and cannot be used for
filenames; see
  https://msdn.microsoft.com/en-gb/library/windows/desktop/aa365247(v=vs.85).aspx

This prevents cloning the QEMU git repo on Windows:

C:\Java\sources\kvm> git clone https://github.com/qemu/qemu.git
Cloning into 'qemu'...
remote: Counting objects: 279563, done.
remote: Total 279563 (delta 0), reused 0 (delta 0), pack-reused 279563R
Receiving objects: 100% (279563/279563), 122.45 MiB | 3.52 MiB/s, done.
Resolving deltas: 100% (221942/221942), done.
Checking connectivity... done.
error: unable to create file hw/misc/aux.c (No such file or directory)
error: unable to create file include/hw/misc/aux.h (No such file or directory)
Checking out files: 100% (4795/4795), done.
fatal: unable to checkout working tree
warning: Clone succeeded, but checkout failed.
You can inspect what was checked out with 'git status'
and retry the checkout with 'git checkout -f HEAD'

(bug https://bugs.launchpad.net/bugs/1595240)

Rename the offending files for the benefit of Windows.

Reported-by: Алексей Курган <akurgan@yandex.ru>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Wei Huang <wei@redhat.com>
Tested-by: KONRAD Frederic <fred.konrad@greensocs.com>
Message-id: 1467377145-32385-1-git-send-email-peter.maydell@linaro.org
2016-07-07 13:47:01 +01:00
Paolo Bonzini
5229f45bd9 aux: fix break that wanted to break two levels out
The last "ret = AUX_I2C_NACK;" is dead, because it is always overridden
by AUX_I2C_ACK.  What really the code wants is to jump out of the switch
statement, and a "return" will not cut it because it would omit a debug
printf.

Change the logic so that we can break out of the while loop.  For clarity,
hoist the bus->last_* assignments up, right after i2c_start_transfer.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 13:47:00 +01:00
Paolo Bonzini
bb14a1eda0 xlnx_dp: fix iffy xlnx_dp_aux_push_tx_fifo
xlnx_dp_aux_push_tx_fifo takes an immediate uint8_t and a buffer length,
which must be 1 because that is how many uint8_t's fit in a uint8_t.
Sure enough, that is what xlnx_dp_write passes to it, but the function
is just weird.  Therefore, make xlnx_dp_aux_push_tx_fifo look like
xlnx_dp_aux_push_rx_fifo, taking a pointer to the buffer.

Reported by Coverity.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 13:47:00 +01:00
Peter Maydell
90e26f5aac target-arm/arm-semi.c: In SYS_HEAPINFO use correct type for 'limit'
In commit f5666418c4 most of the SYS_HEAPINFO implementation was
fixed to use target_ulong rather than uint32_t, but the 'limit'
variable was not changed.

Reported-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Reviewed-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1467650942-28706-1-git-send-email-peter.maydell@linaro.org
2016-07-07 13:47:00 +01:00
Peter Maydell
5563168c53 Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Thu 07 Jul 2016 07:29:44 BST
# gpg:                using RSA key 0xEF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  tap: vhost busy polling support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-07 10:29:05 +01:00
Jason Wang
69e87b3268 tap: vhost busy polling support
This patch add the capability of basic vhost net busy polling which is
supported by recent kernel. User could configure the maximum number of
us that could be spent on busy polling through a new property of tap
"poll-us".

Cc: Greg Kurz <groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-07-07 14:29:04 +08:00
Peter Maydell
91d3550990 Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160706' into staging
misc updates

# gpg: Signature made Wed 06 Jul 2016 17:17:02 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160706:
  tcg: Improve the alignment check infrastructure
  tcg: Optimize spills of constants
  tcg: Fix name for high-half register
  build: Use $(CCAS) for compiling .S files

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-06 17:32:09 +01:00
Peter Maydell
0c56c6ab68 Merge remote-tracking branch 'remotes/spice/tags/pull-spice-20160706-1' into staging
spice and qxl bugfixes.

# gpg: Signature made Wed 06 Jul 2016 10:44:10 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/spice/tags/pull-spice-20160706-1:
  virgl: pass whole GL scanout dimensions
  spice: use the right head for multi-monitor
  virgl: count the calls to gl_block
  spice: avoid .set_mm_time on >= 0.12.6
  qxl: fix surface migration
  qxl: store memory region and offset instead of pointer for guest slots
  qxl: factor out qxl_get_check_slot_offset
  qxl: handle no updates in interface_update_area_complete
  qxl: use uint64_t for vram size

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-06 12:49:51 +01:00
Peter Maydell
975b1c3ac6 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-07-06' into staging
QAPI patches for 2016-07-06

# gpg: Signature made Wed 06 Jul 2016 10:00:51 BST
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2016-07-06:
  replay: Use new QAPI cloning
  sockets: Use new QAPI cloning
  qapi: Add new clone visitor
  qapi: Add new visit_complete() function
  tests: Factor out common code in qapi output tests
  tests: Clean up test-string-output-visitor
  qmp-output-visitor: Favor new visit_free() function
  string-output-visitor: Favor new visit_free() function
  qmp-input-visitor: Favor new visit_free() function
  string-input-visitor: Favor new visit_free() function
  opts-visitor: Favor new visit_free() function
  qapi: Add new visit_free() function
  qapi: Add parameter to visit_end_*
  qemu-img: Don't leak errors when outputting JSON
  qapi: Improve use of qmp/types.h

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-06 11:38:09 +01:00
Peter Maydell
fc5d0a2b24 Merge remote-tracking branch 'remotes/afaerber/tags/qom-devices-for-peter' into staging
QOM infrastructure fixes and device conversions

* Documentation fix

# gpg: Signature made Wed 06 Jul 2016 08:26:49 BST
# gpg:                using RSA key 0xFA2ED12D3E7E013F
# gpg: Good signature from "Andreas Färber <afaerber@suse.de>"
# gpg:                 aka "Andreas Färber <afaerber@suse.com>"
# Primary key fingerprint: 174F 0347 1BCC 221A 6175  6F96 FA2E D12D 3E7E 013F

* remotes/afaerber/tags/qom-devices-for-peter:
  qom: Fix comment typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-06 10:23:25 +01:00
Eric Blake
b6954712ab replay: Use new QAPI cloning
Rather than rolling our own clone via an expensive conversion
in and back out of QObject, use the new clone visitor.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-16-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
37f9e0a2b6 sockets: Use new QAPI cloning
Rather than rolling our own clone via an expensive conversion
in and back out of QObject, use the new clone visitor.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-15-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
a15fcc3cf6 qapi: Add new clone visitor
We have a couple places in the code base that want to deep-clone
one QAPI object into another, and they were resorting to serializing
the struct out to QObject then reparsing it.  A much more efficient
version can be done by adding a new clone visitor.

Since cloning is still relatively uncommon, expose the use of the
new visitor via a QAPI_CLONE() macro that takes care of type-punning
the underlying function pointer, rather than generating lots of
unused functions for types that won't be cloned.  And yes, we're
relying on the compiler treating all pointers equally, even though
a strict C program cannot portably do so - but we're not the first
one in the qemu code base to expect it to work (hello, glib!).

The choice of adding a fourth visitor type deserves some explanation.
On the surface, the clone visitor is mostly an input visitor (it
takes arbitrary input - in this case, another QAPI object - and
creates a new QAPI object during the course of the visit).  But
ever since commit da72ab0 consolidated enum visits based on the
visitor type, using VISITOR_INPUT would cause us to run
visit_type_str(), even though for cloning there is nothing to do
(we just copy the enum value across, without regards to its mapping
to strings).   Also, since our input happens to be a QAPI object,
we can also satisfy the internal checks for VISITOR_OUTPUT.  So in
the end, I settled with a new VISITOR_CLONE, and chose its value
such that many internal checks can use 'v->type & mask', sticking
to 'v->type == value' where the difference matters.

Note that we can only clone objects (including alternates) and lists,
not built-ins or enums.  The visitor core hides integer width from
the actual visitor (since commit 04e070d), and as long as that's the
case, we can't clone top-level integers.  Then again, those can
always be cloned by direct copy, since they are not objects with
deep pointers, so it's no real loss.  And restricting cloning to
just objects and lists is cleaner than restricting it to non-integers.
As such, I documented that the clone visitor is for direct use only
by code internal to QAPI, and should not be used on incomplete objects
(other than a hack to work around the fact that we allow NULL in place
of "" in visit_type_str() in other output visitors).  Note that as
written, the clone visitor will never fail on a complete object.

Scalars (including enums) not at the root of the clone copy just fine
with no additional effort while visiting the scalar, by virtue of a
g_memdup() each time we push another struct onto the stack.  Cloning
a string requires deduplication of a pointer, which means it can also
provide the guarantee of an input visitor of never producing NULL
even when still accepting NULL in place of "" the way the QMP output
visitor does.

Cloning an 'any' type could be possible by incrementing the QObject
refcnt, but it's not obvious whether that is better than implementing
a QObject deep clone.  So for now, we document it as unsupported,
and intentionally omit the .type_any() callback to let a developer
know their usage needs implementation.

Add testsuite coverage for several different clone situations, to
ensure that the code is working.  I also tested that valgrind was
happy with the test.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-14-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
3b098d5697 qapi: Add new visit_complete() function
Making each output visitor provide its own output collection
function was the only remaining reason for exposing visitor
sub-types to the rest of the code base.  Add a polymorphic
visit_complete() function which is a no-op for input visitors,
and which populates an opaque pointer for output visitors.  For
maximum type-safety, also add a parameter to the output visitor
constructors with a type-correct version of the output pointer,
and assert that the two uses match.

This approach was considered superior to either passing the
output parameter only during construction (action at a distance
during visit_free() feels awkward) or only during visit_complete()
(defeating type safety makes it easier to use incorrectly).

Most callers were function-local, and therefore a mechanical
conversion; the testsuite was a bit trickier, but the previous
cleanup patch minimized the churn here.

The visit_complete() function may be called at most once; doing
so lets us use transfer semantics rather than duplication or
ref-count semantics to get the just-built output back to the
caller, even though it means our behavior is not idempotent.

Generated code is simplified as follows for events:

|@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP
|     QDict *qmp;
|     Error *err = NULL;
|     QMPEventFuncEmit emit;
|-    QmpOutputVisitor *qov;
|+    QObject *obj;
|     Visitor *v;
|     q_obj_ACPI_DEVICE_OST_arg param = {
|         info
|@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP
|
|     qmp = qmp_event_build_dict("ACPI_DEVICE_OST");
|
|-    qov = qmp_output_visitor_new();
|-    v = qmp_output_get_visitor(qov);
|+    v = qmp_output_visitor_new(&obj);
|
|     visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err);
|     if (err) {
|@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP
|         goto out;
|     }
|
|-    qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov));
|+    visit_complete(v, &obj);
|+    qdict_put_obj(qmp, "data", obj);
|     emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err);

and for commands:

| {
|     Error *err = NULL;
|-    QmpOutputVisitor *qov = qmp_output_visitor_new();
|     Visitor *v;
|
|-    v = qmp_output_get_visitor(qov);
|+    v = qmp_output_visitor_new(ret_out);
|     visit_type_AddfdInfo(v, "unused", &ret_in, &err);
|-    if (err) {
|-        goto out;
|+    if (!err) {
|+        visit_complete(v, ret_out);
|     }
|-    *ret_out = qmp_output_get_qobject(qov);
|-
|-out:
|     error_propagate(errp, err);

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
23d1705f42 tests: Factor out common code in qapi output tests
Create a new visitor_get() function to capture common
actions taken in collecting output from an output visitor,
to make it easier to refactor the output visitors in a
later patch.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-12-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
a8fff94d28 tests: Clean up test-string-output-visitor
Use &error_abort and error_free_or_abort() in more places, use
the generated qapi_free_intList() instead of open-coding it,
reduce the scope of some variables, avoid code duplication
during test setup with visitor_output_setup_internal(), and
copy the visitor_reset() concept from the qmp-output test to
the string-output test.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-11-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
1830f22a67 qmp-output-visitor: Favor new visit_free() function
Now that we have a polymorphic visit_free(), we no longer need
qmp_output_visitor_cleanup(); however, we still need to
expose the subtype for qmp_output_get_qobject().

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-10-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
e7ca565629 string-output-visitor: Favor new visit_free() function
Now that we have a polymorphic visit_free(), we no longer need
string_output_visitor_cleanup(); however, we still need to
expose the subtype for string_output_get_string().

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-9-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
b70ce1018a qmp-input-visitor: Favor new visit_free() function
Now that we have a polymorphic visit_free(), we no longer need
qmp_input_visitor_cleanup(); which in turn means we no longer
need to return a subtype from qmp_input_visitor_new() nor a
public upcast function.

Generated code changes to qmp-marshal.c look like:

|@@ -52,11 +52,10 @@ void qmp_marshal_add_fd(QDict *args, QOb
| {
|     Error *err = NULL;
|     AddfdInfo *retval;
|-    QmpInputVisitor *qiv = qmp_input_visitor_new(QOBJECT(args), true);
|     Visitor *v;
|     q_obj_add_fd_arg arg = {0};
|
|-    v = qmp_input_get_visitor(qiv);
|+    v = qmp_input_visitor_new(QOBJECT(args), true);
|     visit_start_struct(v, NULL, NULL, 0, &err);
|     if (err) {
|         goto out;

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-8-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
7a0525c7be string-input-visitor: Favor new visit_free() function
Now that we have a polymorphic visit_free(), we no longer need
string_input_visitor_cleanup(); which in turn means we no longer
need to return a subtype from string_input_visitor_new() nor a
public upcast function.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-7-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
09204eac9b opts-visitor: Favor new visit_free() function
Now that we have a polymorphic visit_free(), we no longer need
opts_visitor_cleanup(); which in turn means we no longer need
to return a subtype from opts_visitor_new() nor a public upcast
function.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-6-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
2c0ef9f411 qapi: Add new visit_free() function
Making each visitor provide its own (awkwardly-named) FOO_cleanup()
is unusual, when we can instead have a polymorphic visit_free()
interface.  Over the next few patches, we can use the polymorphic
functions to eliminate the need for a FOO_get_visitor() function
for accessing specific visitor functionality, once everything can
be accessed directly through the Visitor* interfaces.

The dealloc visitor is the first one converted to completely use
the new entry point, since qapi_dealloc_visitor_cleanup() was the
only reason that qapi_dealloc_get_visitor() existed, and only
generated and testsuite code was even using it.  With the new
visit_free() entry point in place, we no longer need to expose
the QapiDeallocVisitor subtype through qapi_dealloc_visitor_new(),
and can get by with less generated code, with diffs that look like:

| void qapi_free_ACPIOSTInfo(ACPIOSTInfo *obj)
| {
|-    QapiDeallocVisitor *qdv;
|     Visitor *v;
|
|     if (!obj) {
|         return;
|     }
|
|-    qdv = qapi_dealloc_visitor_new();
|-    v = qapi_dealloc_get_visitor(qdv);
|+    v = qapi_dealloc_visitor_new();
|     visit_type_ACPIOSTInfo(v, NULL, &obj, NULL);
|-    qapi_dealloc_visitor_cleanup(qdv);
|+    visit_free(v);
|}

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-5-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
1158bb2a05 qapi: Add parameter to visit_end_*
Rather than making the dealloc visitor track of stack of pointers
remembered during visit_start_* in order to free them during
visit_end_*, it's a lot easier to just make all callers pass the
same pointer to visit_end_*.  The generated code has access to the
same pointer, while all other users are doing virtual walks and
can pass NULL.  The dealloc visitor is then greatly simplified.

All three visit_end_*() functions intentionally take a void**,
even though the visit_start_*() functions differ between void**,
GenericList**, and GenericAlternate**.  This is done for several
reasons: when doing a virtual walk, passing NULL doesn't care
what the type is, but when doing a generated walk, we already
have to cast the caller's specific FOO* to call visit_start,
while using void** lets us use visit_end without a cast. Also,
an upcoming patch will add a clone visitor that wants to use
the same implementation for all three visit_end callbacks,
which is made easier if all three share the same signature.

For visitors with already track per-object state (the QMP visitors
via a stack, and the string visitors which do not allow nesting),
add an assertion that the caller is indeed passing the same
pointer to paired calls.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-4-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
911ee36d41 qemu-img: Don't leak errors when outputting JSON
If our JSON output ever encounters an error, we would just silently
leak the error object.  Instead, assert that our usage won't fail.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-3-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:04 +02:00
Eric Blake
c7eb39cbd4 qapi: Improve use of qmp/types.h
'qjson.h' is not a QObject subtype; include this file directly in
.c files that are using it, rather than abusing qmp/types.h for
that purpose.

Meanwhile, for files that include a list of individual QObject
subtypes, it's easier to just use qmp/types.h for that purpose.

Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-2-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-06 10:52:03 +02:00
Marc-André Lureau
9d8256ebc0 virgl: pass whole GL scanout dimensions
Spice client needs the whole GL texture dimension to be able to show a
scanout with a monitor offset (different than +0+0).

Furthermore, this fixes a crash when calling surface_{width,height}()
after dpy_gfx_replace_surface(con, NULL) was called in
virgl_cmd_set_scanout()

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1465911849-30423-4-git-send-email-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-06 10:32:14 +02:00
Marc-André Lureau
c61d8126fc spice: use the right head for multi-monitor
Look up the associated head monitor config.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1465911849-30423-3-git-send-email-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-06 10:32:14 +02:00
Marc-André Lureau
c540128f93 virgl: count the calls to gl_block
In virgl_cmd_resource_flush(), when several consoles are updated, it
needs to keep blocking until all spice gl draws are done. This fixes an
assert() in spice when using multiple monitors with virgl.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1465911849-30423-2-git-send-email-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-06 10:32:14 +02:00
John Snow
015e02f880 spice: avoid .set_mm_time on >= 0.12.6
Spice deprecated this callback in 0.12.6.
It's not a problem yet, but it will cause Clang to fail in a -Werror
build due to the deprecated tag.

Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 1467240095-12507-2-git-send-email-jsnow@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-06 10:31:57 +02:00
Gerd Hoffmann
1331eab216 qxl: fix surface migration
Create a helper function qxl_dirty_one_surface() to mark a single qxl
surface as dirty.  Use the new qxl_get_check_slot_offset function and
lookup the memory region from the slot instead of assuming the surface
is stored in vram.

Use the new helper function in qxl_dirty_surfaces, for both primary and
off-screen surfaces.  For off-screen surfaces this is no functional
change.  For primary surfaces this will dirty only the memory actually
used instead of the whole surface0 region.  It will also work correctly
in case the guest places the primary surface in vram instead of the
surface0 region (linux kms driver does that).

https://bugzilla.redhat.com/show_bug.cgi?id=1235732

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466597244-5938-3-git-send-email-kraxel@redhat.com
2016-07-06 10:31:11 +02:00
Gerd Hoffmann
3cb5158f15 qxl: store memory region and offset instead of pointer for guest slots
Store MemoryRegion and offset instead of a pointer for each qxl memory
slot, so we can easily figure in which memory region an qxl object
stored.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466597244-5938-2-git-send-email-kraxel@redhat.com
2016-07-06 10:31:11 +02:00
Gerd Hoffmann
726bdf653a qxl: factor out qxl_get_check_slot_offset
New helper function which translates a qxl physical address into
memory slot and offset.  Also applies sanity checks.  Factored out
from qxl_phys2virt.  No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1466597244-5938-1-git-send-email-kraxel@redhat.com
2016-07-06 10:31:11 +02:00
Gerd Hoffmann
2f5ae772c6 qxl: handle no updates in interface_update_area_complete
Simply return early in case there are no updated rects.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1465395101-13580-1-git-send-email-kraxel@redhat.com
2016-07-06 10:31:02 +02:00
Gerd Hoffmann
de1b9b85ef qxl: use uint64_t for vram size
This allows for the 64bit vram bar to become larger than 2G
(try -device qxl-vga,vram64_size_mb=8192).

https://bugzilla.redhat.com/show_bug.cgi?id=1340439

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1465389648-5179-1-git-send-email-kraxel@redhat.com
2016-07-06 10:30:50 +02:00
Changlong Xie
ada03a0e84 qom: Fix comment typo
It's qom_unref, not qdef_unref.

Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Andreas Färber <afaerber@suse.de>
2016-07-06 09:19:35 +02:00
Sergey Sorokin
1f00b27f17 tcg: Improve the alignment check infrastructure
Some architectures (e.g. ARMv8) need the address which is aligned
to a size more than the size of the memory access.
To support such check it's enough the current costless alignment
check implementation in QEMU, but we need to support
an alignment size specifying.

Signed-off-by: Sergey Sorokin <afarallax@yandex.ru>
Message-Id: <1466705806-679898-1-git-send-email-afarallax@yandex.ru>
Signed-off-by: Richard Henderson <rth@twiddle.net>
[rth: Assert in tcg_canonicalize_memop.  Leave get_alignment_bits
available for, though unused by, user-mode.  Retain logging difference
based on ALIGNED_ONLY.]
2016-07-05 20:50:13 -07:00
Richard Henderson
59d7c14eef tcg: Optimize spills of constants
While we can store constants via constrants on INDEX_op_st_i32 et al,
we weren't able to spill constants to backing store.

Add a new backend interface, tcg_out_sti, which may store the constant
(and is allowed to fail).  Rearrange the temp_* helpers so that we only
attempt to directly store a constant when the temp is becoming dead/free.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-05 20:50:13 -07:00
Richard Henderson
120c1084ed tcg: Fix name for high-half register
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Richard Henderson <rth@twiddle.net>
2016-07-05 20:50:12 -07:00
Richard Henderson
5f6f0e27fb build: Use $(CCAS) for compiling .S files
We fail to pass to $(AS) all of the different flags that may be required
for a given set of CFLAGS.  Rather than figuring out the host-specific
mapping, it's better to allow the compiler driver to do that.

However, simply using $(CC) runs afoul of clang trying to build the
option roms.  C.f. 3dd46c7852, wherein we changed from
using $(CC) to using $(AS) in the first place.

Work around this by passing -fno-integrated-as to clang, so that we use
the external assembler, and the clang driver still passes along all of
the options that the assembler might require.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <1466703558-7723-1-git-send-email-rth@twiddle.net>
2016-07-05 20:50:11 -07:00
Peter Maydell
07bee7f4f4 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Tue 05 Jul 2016 16:46:14 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (43 commits)
  block/qcow2: Don't use cpu_to_*w()
  block: Convert bdrv_co_preadv/pwritev to BdrvChild
  block: Convert bdrv_prwv_co() to BdrvChild
  block: Convert bdrv_pwrite_zeroes() to BdrvChild
  block: Convert bdrv_pwrite(v/_sync) to BdrvChild
  block: Convert bdrv_pread(v) to BdrvChild
  block: Convert bdrv_write() to BdrvChild
  block: Convert bdrv_read() to BdrvChild
  block: Use BlockBackend for I/O in bdrv_commit()
  block: Move bdrv_commit() to block/commit.c
  block: Convert bdrv_co_do_readv/writev to BdrvChild
  block: Convert bdrv_aio_writev() to BdrvChild
  block: Convert bdrv_aio_readv() to BdrvChild
  block: Convert bdrv_co_writev() to BdrvChild
  block: Convert bdrv_co_readv() to BdrvChild
  vhdx: Some more BlockBackend use in vhdx_create()
  blkreplay: Convert to byte-based I/O
  vvfat: Use BdrvChild for s->qcow
  block/qdev: Fix NULL access when using BB twice
  block: fix return code for partial write for Linux AIO
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-05 17:53:02 +01:00
Peter Maydell
791b7d2340 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: new features, cleanups, fixes

iommus can not be added with -device.
cleanups and fixes all over the place

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 05 Jul 2016 11:18:32 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (30 commits)
  vmw_pvscsi: remove unnecessary internal msi state flag
  e1000e: remove unnecessary internal msi state flag
  vmxnet3: remove unnecessary internal msi state flag
  mptsas: remove unnecessary internal msi state flag
  megasas: remove unnecessary megasas_use_msi()
  pci: Convert msi_init() to Error and fix callers to check it
  pci bridge dev: change msi property type
  megasas: change msi/msix property type
  mptsas: change msi property type
  intel-hda: change msi property type
  usb xhci: change msi/msix property type
  change pvscsi_init_msi() type to void
  tests: add APIC.cphp and DSDT.cphp blobs
  tests: acpi: add CPU hotplug testcase
  log: Permit -dfilter 0..0xffffffffffffffff
  range: Replace internal representation of Range
  range: Eliminate direct Range member access
  log: Clean up misuse of Range for -dfilter
  pci_register_bar: cleanup
  Revert "virtio-net: unbreak self announcement and guest offloads after migration"
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-05 16:48:24 +01:00
Kevin Wolf
b0aaca4d7f Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2016-07-05-v2' into queue-block
A block patch for the block queue

# gpg: Signature made Tue Jul  5 16:54:22 2016 CEST
# gpg:                using RSA key 0x3BB14202E838ACAD
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40
#      Subkey fingerprint: 58B3 81CE 2DC8 9CF9 9730  EE64 3BB1 4202 E838 ACAD

* mreitz/tags/pull-block-for-kevin-2016-07-05-v2:
  block/qcow2: Don't use cpu_to_*w()

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:55:31 +02:00
Peter Maydell
f1f7a1ddf3 block/qcow2: Don't use cpu_to_*w()
Don't use the cpu_to_*w() functions, which we are trying to deprecate.
Instead either just use cpu_to_*() to do the byteswap, or use
st*_be_p() if we need to do the store somewhere other than to a
variable that's already the correct type.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1466093177-17890-1-git-send-email-peter.maydell@linaro.org
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
2016-07-05 16:54:04 +02:00
Kevin Wolf
a03ef88f77 block: Convert bdrv_co_preadv/pwritev to BdrvChild
This is the final patch for converting the common I/O path to take
a BdrvChild parameter instead of BlockDriverState.

The completion of this conversion means that all users that perform I/O
on an image need to actually hold a reference (in the form of BdrvChild,
possible as part of a BlockBackend) to that image. This also protects
against inconsistent use of BlockBackend vs. BlockDriverState functions
because direct use of a BlockDriverState isn't possible any more and
blk->root is private for block-backends.c.

In addition, we can now distinguish different users in the I/O path,
and the future op blockers work is going to add assertions based on
permissions stored in BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
e293b7a3df block: Convert bdrv_prwv_co() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
720ff280e7 block: Convert bdrv_pwrite_zeroes() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
d9ca2ea2e2 block: Convert bdrv_pwrite(v/_sync) to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
cf2ab8fc34 block: Convert bdrv_pread(v) to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
18d51c4bac block: Convert bdrv_write() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
fbcbbf4e80 block: Convert bdrv_read() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
f8e2bd538d block: Use BlockBackend for I/O in bdrv_commit()
Just like block jobs, the HMP commit command should use its own
BlockBackend for doing I/O on BlockDriverStates.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
83fd6dd3e7 block: Move bdrv_commit() to block/commit.c
No code changes, just moved from one file to another.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
adad6496c5 block: Convert bdrv_co_do_readv/writev to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:27 +02:00
Kevin Wolf
0d1049c7d1 block: Convert bdrv_aio_writev() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
ebb7af2173 block: Convert bdrv_aio_readv() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
25ec177d90 block: Convert bdrv_co_writev() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
28b04a8f65 block: Convert bdrv_co_readv() to BdrvChild
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
db1e80ee2e vhdx: Some more BlockBackend use in vhdx_create()
This does some easy conversions from bdrv_* to blk_* functions in
vhdx_create(). We should avoid bypassing the BlockBackend layer whenever
possible.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
e858a9705c blkreplay: Convert to byte-based I/O
The blkreplay driver only forwards the requests it gets, so converting
it to byte granularity is trivial.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
eecc77473b vvfat: Use BdrvChild for s->qcow
vvfat uses a temporary qcow file to cache written data in read-write
mode. In order to do things properly, this should show up in the BDS
graph and I/O should go through BdrvChild like for every other node.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Kevin Wolf
a9d52a7563 block/qdev: Fix NULL access when using BB twice
BlockBackend has only a single pointer to its guest device, so it makes
sure that only a single guest device is attached to it. device-add
returns an error if you try to attach a second device to a BB. In order
to make the error message nicer, -device that manually connects to a
if=none block device get a different message than -drive that implicitly
creates a guest device. The if=... option is stored in DriveInfo.

However, since blockdev-add exists, not every BlockBackend has a
DriveInfo any more. Check that it exists before we dereference it.

QMP reproducer resulting in a segfault:

{"execute":"blockdev-add","arguments":{"options":{"id":"disk","driver":"file","filename":"/tmp/test.img"}}}
{"execute":"device_add","arguments":{"driver":"virtio-blk-pci","drive":"disk"}}
{"execute":"device_add","arguments":{"driver":"virtio-blk-pci","drive":"disk"}}

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2016-07-05 16:46:26 +02:00
Denis V. Lunev
1c42f149dd block: fix return code for partial write for Linux AIO
Partial write most likely means that there is not space rather than
"something wrong happens". Thus it would be more natural to return
ENOSPC rather than EINVAL.

The problem actually happens with NBD server, which has reported EINVAL
rather then ENOSPC on the first error using its protocol, which makes
report to the user wrong.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Pavel Borzenkov <pborzenkov@virtuozzo.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:26 +02:00
Eric Blake
5411541270 block: Use bool as appropriate for BDS members
Using int for values that are only used as booleans is confusing.
While at it, rearrange a couple of members so that all the bools
are contiguous.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:26 +02:00
Eric Blake
8cc9c6e92b block: Fix error message style
error_setg() is not supposed to be used for multi-sentence
messages; tweak the message to append a hint instead.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:26 +02:00
Eric Blake
a5b8dd2ce8 block: Move request_alignment into BlockLimit
It makes more sense to have ALL block size limit constraints
in the same struct.  Improve the documentation while at it.

Simplify a couple of conditionals, now that we have audited and
documented that request_alignment is always non-zero.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:26 +02:00
Eric Blake
d9e0dfa246 block: Split bdrv_merge_limits() from bdrv_refresh_limits()
During bdrv_merge_limits(), we were computing initial limits
based on another BDS in two places.  At first glance, the two
computations are not identical (one is doing straight copying,
the other is doing merging towards or away from zero) - but
when you realize that the first round is starting with all-0
memory, all of the merging happens to work.  Factoring out the
merging makes it easier to track how two BDS limits are merged,
in case we have future reasons to merge in even more limits.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:26 +02:00
Eric Blake
ad82be2f4f block: Drop raw_refresh_limits()
The raw block driver was blindly copying all limits from bs->file,
even though: 1. the main bdrv_refresh_limits() already does this
for many of the limits, and 2. blindly copying from the children
can weaken any stricter limits that were already inherited from
the backing chain during the main bdrv_refresh_limits().  Also,
a future patch is about to move .request_alignment into
BlockLimits, and that is a limit that should NOT be copied from
other layers in the BDS chain.

Thus, we can completely drop raw_refresh_limits(), and rely on
the block layer setting up the proper limits.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
b9f7855a50 block: Switch discard length bounds to byte-based
Sector-based limits are awkward to think about; in our on-going
quest to move to byte-based interfaces, convert max_discard and
discard_alignment.  Rename them, using 'pdiscard' as an aid to
track which remaining discard interfaces need conversion, and so
that the compiler will help us catch the change in semantics
across any rebased code.  The BlockLimits type is now completely
byte-based; and in iscsi.c, sector_limits_lun2qemu() is no
longer needed.

pdiscard_alignment is made unsigned (we use power-of-2 alignments
as bitmasks, where unsigned is easier to think about) while
leaving max_pdiscard signed (since we still have an 'int'
interface); this is comparable to what commit cf081fc did for
write zeroes limits.  We may later want to make everything an
unsigned 64-bit limit - but that requires a bigger code audit.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
29cc6a6834 block: Wording tweaks to write zeroes limits
Improve the documentation of the write zeroes limits, to mention
additional constraints that drivers should observe.  Worth squashing
into commit cf081fca, if that hadn't been pushed already :)

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
5def6b80e1 block: Switch transfer length bounds to byte-based
Sector-based limits are awkward to think about; in our on-going
quest to move to byte-based interfaces, convert max_transfer_length
and opt_transfer_length.  Rename them (dropping the _length suffix)
so that the compiler will help us catch the change in semantics
across any rebased code, and improve the documentation.  Use unsigned
values, so that we don't have to worry about negative values and
so that bit-twiddling is easier; however, we are still constrained
by 2^31 of signed int in most APIs.

When a value comes from an external source (iscsi and raw-posix),
sanitize the results to ensure that opt_transfer is a power of 2.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
79ba8c986a block: Set default request_alignment during bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Now that all drivers have been updated to supply an override
of request_alignment during their .bdrv_refresh_limits(), as
needed, the block layer itself can defer setting the default
alignment until part of the overall bdrv_refresh_limits().

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
a65064816d block: Set request_alignment during .bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Add a .bdrv_refresh_limits() to all four of our legacy devices
that will always be sector-only (bochs, cloop, dmg, vvfat), in
spite of their recent conversion to expose a byte interface.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
2914a1de99 raw-win32: Set request_alignment during .bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

In this case, raw_probe_alignment() already did what we needed,
so just fix its signature and wire it in correctly.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
a84178ccff qcow2: Set request_alignment during .bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
c8b3b998e2 iscsi: Set request_alignment during .bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
835db3ee7b blkdebug: Set request_alignment during .bdrv_refresh_limits()
We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Note that when the user does not provide "align", then we were
defaulting to bs->request_alignment - but at this stage in the
initialization, that was always 512.  We were also rejecting an
explicit "align":0 from the user; this patch now allows that,
as an explicit request for the default alignment (which may not
always be 512 in the future).

qemu-iotests 77 is particularly sensitive to the fact that we
can specify an artificial alignment override in blkdebug, and
that override must continue to work even when limits are
refreshed on an already open device.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
24ce9a2026 block: Give nonzero result to blk_get_max_transfer_length()
Making all callers special-case 0 as unlimited is awkward,
and we DO have a hard maximum of BDRV_REQUEST_MAX_SECTORS given
our current block layer API limits.

In the case of scsi, this means that we now always advertise a
limit to the guest, even in cases where the underlying layers
previously use 0 for no inherent limit beyond the block layer.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
efaf4781a9 scsi: Advertise limits by blocksize, not 512
s->blocksize may be larger than 512, in which case our
tweaks to max_xfer_len and opt_xfer_len must be scaled
appropriately.

CC: qemu-stable@nongnu.org
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
f9e95af0a6 iscsi: Advertise realistic limits to block layer
The function sector_limits_lun2qemu() returns a value in units of
the block layer's 512-byte sector, and can be as large as
0x40000000, which is much larger than the block layer's inherent
limit of BDRV_REQUEST_MAX_SECTORS.  The block layer already
handles '0' as a synonym to the inherent limit, and it is nicer
to return this value than it is to calculate an arbitrary
maximum, for two reasons: we want to ensure that the block layer
continues to special-case '0' as 'no limit beyond the inherent
limits'; and we want to be able to someday expand the block
layer to allow 64-bit limits, where auditing for uses of
BDRV_REQUEST_MAX_SECTORS will help us make sure we aren't
artificially constraining iscsi to old block layer limits.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:25 +02:00
Eric Blake
202204717a nbd: Advertise realistic limits to block layer
We were basing the advertisement of maximum discard and transfer
length off of UINT32_MAX, but since the rest of the block layer
has signed int limits on a transaction, nothing could ever reach
that maximum, and we risk overflowing an int once things are
converted to byte-based rather than sector-based limits.  What's
more, we DO have a much smaller limit: both the current kernel
and qemu-nbd have a hard limit of 32M on a read or write
transaction, and while they may also permit up to a full 32 bits
on a discard transaction, the upstream NBD protocol is proposing
wording that without any explicit advertisement otherwise,
clients should limit ALL requests to the same limits as read and
write, even though the other requests do not actually require as
many bytes across the wire.  So the better limit to tell the
block layer is 32M for both values.

Behavior doesn't actually change with this patch (the block layer
is currently ignoring the max_transfer advertisements); but when
that problem is fixed in a later series, this patch will prevent
the exposure of a latent bug.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Eric Blake
476b923c32 nbd: Allow larger requests
The NBD layer was breaking up request at a limit of 2040 sectors
(just under 1M) to cater to old qemu-nbd. But the server limit
was raised to 32M in commit 2d8214885 to match the kernel, more
than three years ago; and the upstream NBD Protocol is proposing
documentation that without any explicit communication to state
otherwise, a client should be able to safely assume that a 32M
transaction will work.  It is time to rely on the larger sizing,
and any downstream distro that cares about maximum
interoperability to older qemu-nbd servers can just tweak the
value of #define NBD_MAX_SECTORS.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-stable@nongnu.org
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Eric Blake
82524274ea block: Fix harmless off-by-one in bdrv_aligned_preadv()
If the amount of data to read ends exactly on the total size
of the bs, then we were wasting time creating a local qiov
to read the data in preparation for what would normally be
appending zeroes beyond the end, even though this corner case
has nothing further to do.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Eric Blake
a604fa2ba5 block: Document supported flags during bdrv_aligned_preadv()
We don't pass any flags on to drivers to handle.  Tighten an
assert to explain why we pass 0 to bdrv_driver_preadv(), and add
some comments on things to be aware of if we want to turn on
per-BDS BDRV_REQ_FUA support during reads in the future.  Also,
document that we may want to consider using unmap during
copy-on-read operations where the read is all zeroes.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Eric Blake
cff86b38ac block: Tighter assertions on bdrv_aligned_pwritev()
For symmetry with bdrv_aligned_preadv(), assert that the caller
really has aligned things properly. This requires adding an align
parameter, which is used now only in the new asserts, but will
come in handy in a later patch that adds auto-fragmentation to the
max transfer size, since that value need not always be a multiple
of the alignment, and therefore must be rounded down.

Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Denis V. Lunev
cfef6a45c7 qemu-img: fix failed autotests
There are 9 iotests failed on Ubuntu 15.10 at the moment.
The problem is that options parsing in qemu-img is broken by the
following commit:
    commit 10985131e3
    Author: Denis V. Lunev <den@openvz.org>
    Date:   Fri Jun 17 17:44:13 2016 +0300
    qemu-img: move common options parsing before commands processing

This strange command line reports error
  ./qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1024
  qemu-img: Invalid image size specified!
while original code parses it successfully.

The problem is that getopt_long state should be reset. This could be done
using this assignment according to the manual:
    optind = 0

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Eric Blake <eblake@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-05 16:46:24 +02:00
Peter Maydell
60a0f1af07 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ipxe-20160704-1' into staging
ipxe: update submodule from 4e03af8ec to 041863191
e1000e+vmxnet3: add boot rom

# gpg: Signature made Mon 04 Jul 2016 07:25:46 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ipxe-20160704-1:
  build: add pc-bios to config-host.mak deps
  ipxe: add new roms to BLOBS
  ipxe: update prebuilt binaries
  vmxnet3: add boot rom
  e1000e: add boot rom
  ipxe: add vmxnet3 rom
  ipxe: add e1000e rom
  ipxe: update submodule from 4e03af8ec to 041863191

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-05 12:46:18 +01:00
Cao jin
269fe4c3ab vmw_pvscsi: remove unnecessary internal msi state flag
Internal flag msi_used is uncesessary, msi_uninit() could be called
directly, msi_enabled() is enough to check device msi state.

But for migration compatibility, keep the field in structure.

cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Dmitry Fleytman <dmitry@daynix.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
66bf7d58d8 e1000e: remove unnecessary internal msi state flag
Internal big flag E1000E_USE_MSI is unnecessary, also is the helper
function: e1000e_init_msi(), e1000e_cleanup_msi(), so, remove them all.

cc: Dmitry Fleytman <dmitry@daynix.com>
cc: Jason Wang <jasowang@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
1070048eae vmxnet3: remove unnecessary internal msi state flag
Internal flag msi_used is unnecessary, it has the same effect as msi_enabled().
msi_uninit() could be called directly without risk.

cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Dmitry Fleytman <dmitry@daynix.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
2e2aa31674 mptsas: remove unnecessary internal msi state flag
internal flag msi_in_use in unnecessary, msi_uninit() could be called
directly, and msi_enabled() is enough to check device msi state.

cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>
cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
afea4e1410 megasas: remove unnecessary megasas_use_msi()
megasas overwrites user configuration when msi_init fail to flag internal msi
state, which is unsuitable. megasa_use_msi() is unnecessary, we can call
msi_uninit() directly when unrealize, even no need to call msi_enabled() first.

cc: Hannes Reinecke <hare@suse.de>
cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>

Acked-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
1108b2f8a9 pci: Convert msi_init() to Error and fix callers to check it
msi_init() reports errors with error_report(), which is wrong
when it's used in realize().

Fix by converting it to Error.

Fix its callers to handle failure instead of ignoring it.

For those callers who don't handle the failure, it might happen:
when user want msi on, but he doesn't get what he want because of
msi_init fails silently.

cc: Gerd Hoffmann <kraxel@redhat.com>
cc: John Snow <jsnow@redhat.com>
cc: Dmitry Fleytman <dmitry@daynix.com>
cc: Jason Wang <jasowang@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>
cc: Hannes Reinecke <hare@suse.de>
cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Alex Williamson <alex.williamson@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
2016-07-05 13:14:41 +03:00
Cao jin
69b205bb0b pci bridge dev: change msi property type
>From bit to enum OnOffAuto.

cc: Michael S. Tsirkin <mst@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
b4b4a57fa6 megasas: change msi/msix property type
>From bit to enum OnOffAuto.

cc: Hannes Reinecke <hare@suse.de>
cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
2016-07-05 13:14:41 +03:00
Cao jin
444dd1af1c mptsas: change msi property type
>From uint32 to enum OnOffAuto, and give it a shorter name.

cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
c0f2abff73 intel-hda: change msi property type
>From uint32 to enum OnOffAuto.

cc: Gerd Hoffmann <kraxel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
290fd20db6 usb xhci: change msi/msix property type
>From bit to enum OnOffAuto

cc: Gerd Hoffmann <kraxel@redhat.com>
cc: Michael S. Tsirkin <mst@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Cao jin
b2e1fffb5a change pvscsi_init_msi() type to void
Nobody use its return value, so change the type to void.

cc: Michael S. Tsirkin <mst@redhat.com>
cc: Paolo Bonzini <pbonzini@redhat.com>
cc: Markus Armbruster <armbru@redhat.com>
cc: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-05 13:14:41 +03:00
Peter Maydell
8662d7db39 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160705' into staging
ppc patch queue for 2016-07-05

Here's the current ppc, sPAPR and related drivers patch queue.

  * The big addition is dynamic DMA window support (this includes some
    core VFIO changes)
  * There are also several fixes to the MMU emulation for bugs
    introduced with the HV mode patches
  * Several other bugfixes and cleanups

Changes in v2:
  I messed up and forgot to make a fix in the last patch which BenH
  pointed out (introduced by my rebasing).  That's fixed in this
  version, and I'm replacing the tag in place with the revised
  version.

# gpg: Signature made Tue 05 Jul 2016 06:28:58 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160705:
  ppc/hash64: Fix support for LPCR:ISL
  ppc/hash64: Add proper real mode translation support
  target-ppc: Return page shift from PTEG search
  target-ppc: Simplify HPTE matching
  target-ppc: Correct page size decoding in ppc_hash64_pteg_search()
  ppc: simplify ppc_hash64_hpte_page_shift_noslb()
  spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)
  vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)
  vfio: Add host side DMA window capabilities
  vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)
  spapr_iommu: Realloc guest visible TCE table when starting/stopping listening
  ppc: simplify max_smt initialization in ppc_cpu_realizefn()
  spapr: Ensure thread0 of CPU core is always realized first
  ppc: Fix xsrdpi, xvrdpi and xvrspi rounding

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-05 11:14:27 +01:00
Benjamin Herrenschmidt
2c7ad80443 ppc/hash64: Fix support for LPCR:ISL
We need to ignore the segment page size and essentially treat
all pages as coming from a 4K segment.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Adjusted for differences in my version of the prereq patches]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 15:18:26 +10:00
Benjamin Herrenschmidt
912acdf487 ppc/hash64: Add proper real mode translation support
This adds proper support for translating real mode addresses based
on the combination of HV and LPCR bits. This handles HRMOR offset
for hypervisor real mode, and both RMA and VRMA modes for guest
real mode. PAPR mode adjusts the offsets appropriately to match the
RMA used in TCG, but we need to limit to the max supported by the
implementation (16G).

This includes some fixes by Cédric Le Goater <clg@kaod.org>

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[dwg: Adjusted for differences in my version of the prereq patches]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:31:08 +10:00
David Gibson
949868633f target-ppc: Return page shift from PTEG search
ppc_hash64_pteg_search() now decodes a PTEs page size encoding, which it
didn't previously do.  This means we're now double decoding the page size
because we check it int he fault path after ppc64_hash64_htab_lookup()
returns.

To avoid this duplication have ppc_hash64_pteg_search() and
ppc_hash64_htab_lookup() return the page size from the PTE and use that in
the callers instead of decoding again.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2016-07-05 14:31:08 +10:00
David Gibson
073de86aa9 target-ppc: Simplify HPTE matching
ppc_hash64_pteg_search() explicitly checks each HPTE's VALID and
SECONDARY bits, then uses the HPTE64_V_COMPARE() macro to check the B field
and AVPN.  However, a small tweak to HPTE64_V_COMPARE() means we can check
all of these bits at once with a suitable ptem value.  So, consolidate all
the comparisons for simplicity.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2016-07-05 14:31:08 +10:00
David Gibson
651060aba7 target-ppc: Correct page size decoding in ppc_hash64_pteg_search()
The architecture specifies that when searching a PTEG for PTEs, entries
with a page size encoding that's not valid for the current segment should
be ignored, continuing the search.

The current implementation does this with ppc_hash64_pte_size_decode()
which is a very incomplete implementation of this check.  We already have
code to do a full and correct page size decode in hpte_page_shift().

This patch moves hpte_page_shift() so it can be used in
ppc_hash64_pteg_search() and adjusts the latter's parameters to include
a full SLBE instead of just a segment page shift.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2016-07-05 14:31:08 +10:00
Cédric Le Goater
1f0252e66e ppc: simplify ppc_hash64_hpte_page_shift_noslb()
The segment page shift parameter is never used. Let's remove it.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:31:08 +10:00
Alexey Kardashevskiy
ae4de14cd3 spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)
This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.6 machine and older disable it.
This also creates a single DMA window for the older machines to
maintain backward migration.

This implements DDW for PHB with emulated and VFIO devices. The host
kernel support is required. The advertised IOMMU page sizes are 4K and
64K; 16M pages are supported but not advertised by default, in order to
enable them, the user has to specify "pgsz" property for PHB and
enable huge pages for RAM.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800.0000.0000.0000 as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs
and updates all references to dma_liobn. However this does not add
64bit LIOBN to the migration stream as in fact even 32bit LIOBN is
rather pointless there (as it is a PHB property and the management
software can/should pass LIOBNs via CLI) but we keep it for the backward
migration support.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:31:08 +10:00
Alexey Kardashevskiy
2e4109de8e vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)
New VFIO_SPAPR_TCE_v2_IOMMU type supports dynamic DMA window management.
This adds ability to VFIO common code to dynamically allocate/remove
DMA windows in the host kernel when new VFIO container is added/removed.

This adds a helper to vfio_listener_region_add which makes
VFIO_IOMMU_SPAPR_TCE_CREATE ioctl and adds just created IOMMU into
the host IOMMU list; the opposite action is taken in
vfio_listener_region_del.

When creating a new window, this uses heuristic to decide on the TCE table
levels number.

This should cause no guest visible change in behavior.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
[dwg: Added some casts to prevent printf() warnings on certain targets
 where the kernel headers' __u64 doesn't match uint64_t or PRIx64]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:31:08 +10:00
Alexey Kardashevskiy
f4ec5e26ed vfio: Add host side DMA window capabilities
There are going to be multiple IOMMUs per a container. This moves
the single host IOMMU parameter set to a list of VFIOHostDMAWindow.

This should cause no behavioral change and will be used later by
the SPAPR TCE IOMMU v2 which will also add a vfio_host_win_del() helper.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:31:08 +10:00
Alexey Kardashevskiy
318f67ce13 vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)
This makes use of the new "memory registering" feature. The idea is
to provide the userspace ability to notify the host kernel about pages
which are going to be used for DMA. Having this information, the host
kernel can pin them all once per user process, do locked pages
accounting (once) and not spent time on doing that in real time with
possible failures which cannot be handled nicely in some cases.

This adds a prereg memory listener which listens on address_space_memory
and notifies a VFIO container about memory which needs to be
pinned/unpinned. VFIO MMIO regions (i.e. "skip dump" regions) are skipped.

The feature is only enabled for SPAPR IOMMU v2. The host kernel changes
are required. Since v2 does not need/support VFIO_IOMMU_ENABLE, this does
not call it when v2 is detected and enabled.

This enforces guest RAM blocks to be host page size aligned; however
this is not new as KVM already requires memory slots to be host page
size aligned.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[dwg: Fix compile error on 32-bit host]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 14:30:54 +10:00
Alexey Kardashevskiy
606b54986d spapr_iommu: Realloc guest visible TCE table when starting/stopping listening
The sPAPR TCE tables manage 2 copies when VFIO is using an IOMMU -
a guest view of the table and a hardware TCE table. If there is no VFIO
presense in the address space, then just the guest view is used, if
this is the case, it is allocated in the KVM. However since there is no
support yet for VFIO in KVM TCE hypercalls, when we start using VFIO,
we need to move the guest view from KVM to the userspace; and we need
to do this for every IOMMU on a bus with VFIO devices.

This implements the callbacks for the sPAPR IOMMU - notify_started()
reallocated the guest view to the user space, notify_stopped() does
the opposite.

This removes explicit spapr_tce_set_need_vfio() call from PCI hotplug
path as the new callbacks do this better - they notify IOMMU at
the exact moment when the configuration is changed, and this also
includes the case of PCI hot unplug.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 10:43:02 +10:00
Greg Kurz
c4e6c42353 ppc: simplify max_smt initialization in ppc_cpu_realizefn()
kvmppc_smt_threads() returns 1 if KVM is not enabled.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 10:43:02 +10:00
Bharata B Rao
7093645a84 spapr: Ensure thread0 of CPU core is always realized first
During CPU core realization, we create all the thread objects and parent
them to the core object in a loop. However, the realization of thread
objects is done separately by walking the threads of a core using
object_child_foreach(). With this, there is no guarantee on the order
in which the child thread objects get realized. Since CPU device tree
properties are currently derived from the CPU thread object, we assume
thread0 of the core to be the representative thread of the core when
creating device tree properties for the core. If thread0 is not the
first thread that gets realized, then we would end up having an
incorrect dt_id for the core and this causes hotplug failures from
the guest.

Fix this by realizing each thread object by walking the core's thread
object list thereby ensuring that thread0 and other threads are always
realized in the correct order.

Future TODO: CPU DT nodes are per-core properties and we should
ideally base the creation of CPU DT nodes on core objects rather than
the thread objects.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 10:43:02 +10:00
Anton Blanchard
158c87e5de ppc: Fix xsrdpi, xvrdpi and xvrspi rounding
xsrdpi, xvrdpi and xvrspi use the round ties away method, not round
nearest even.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-07-05 10:43:02 +10:00
Igor Mammedov
600426f2df tests: add APIC.cphp and DSDT.cphp blobs
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 19:43:33 +03:00
Peter Maydell
1165942311 Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20160704-3' into staging
Revert "bios: Add fast variant of SeaBIOS for use with -kernel on x86."

# gpg: Signature made Mon 04 Jul 2016 16:24:55 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-seabios-20160704-3:
  Revert "bios: Add fast variant of SeaBIOS for use with -kernel on x86."

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 17:27:54 +01:00
Peter Maydell
0d7e96c9b5 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2016-07-04-1' into staging
Merge qcrypto 2016/07/04 v1

# gpg: Signature made Mon 04 Jul 2016 15:54:26 BST
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2016-07-04-1:
  crypto: allow default TLS priority to be chosen at build time
  crypto: add support for TLS priority string override
  crypto: implement sha224, sha384, sha512 and ripemd160 hashes
  crypto: switch hash code to use nettle/gcrypt directly
  crypto: rename OUT to out in xts test to avoid clash on MinGW
  crypto: fix handling of iv generator hash defaults

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 16:28:58 +01:00
Gerd Hoffmann
3b1154fff1 Revert "bios: Add fast variant of SeaBIOS for use with -kernel on x86."
This reverts commit 4e04ab6a63.

Also remove pc-bios/bios-fast.bin.

Commit was merged by mistake.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-04 17:23:33 +02:00
Daniel P. Berrange
a1c5e949dd crypto: allow default TLS priority to be chosen at build time
Modern gnutls can use a global config file to control the
crypto priority settings for TLS connections. For example
the priority string "@SYSTEM" instructs gnutls to find the
priority setting named "SYSTEM" in the global config file.

Latest gnutls GIT codebase gained the ability to reference
multiple priority strings in the config file, with the first
one that is found to existing winning. This means it is now
possible to configure QEMU out of the box with a default
priority of "@QEMU,SYSTEM", which says to look for the
settings "QEMU" first, and if not found, use the "SYSTEM"
settings.

To make use of this facility, we introduce the ability to
set the QEMU default priority at build time via a new
configure argument.  It is anticipated that distro vendors
will set this when building QEMU to a suitable value for
use with distro crypto policy setup. eg current Fedora
would run

 ./configure --tls-priority=@SYSTEM

while future Fedora would run

 ./configure --tls-priority=@QEMU,SYSTEM

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 15:53:19 +01:00
Daniel P. Berrange
13f12430d4 crypto: add support for TLS priority string override
The gnutls default priority is either "NORMAL" (most historical
versions of gnutls) which is a built-in label in gnutls code,
or "@SYSTEM" (latest gnutls on Fedora at least) which refers
to an admin customizable entry in a gnutls config file.

Regardless of which default is used by a distro, they are both
global defaults applying to all applications using gnutls. If
a single application on the system needs to use a weaker set
of crypto priorities, this potentially forces the weakness onto
all applications. Or conversely if a single application wants a
strong default than all others, it can't do this via the global
config file.

This adds an extra parameter to the tls credential object which
allows the mgmt app / user to explicitly provide a priority
string to QEMU when configuring TLS.

For example, to use the "NORMAL" priority, but disable SSL 3.0
one can now configure QEMU thus:

  $QEMU -object tls-creds-x509,id=tls0,dir=/home/berrange/qemutls,\
                priority="NORMAL:-VERS-SSL3.0" \
        ..other args...

If creating tls-creds-anon, whatever priority the user specifies
will always have "+ANON-DH" appended to it, since that's mandatory
to make the anonymous credentials work.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 15:52:43 +01:00
Daniel P. Berrange
9164b89762 crypto: implement sha224, sha384, sha512 and ripemd160 hashes
Wire up the nettle and gcrypt hash backends so that they can
support the sha224, sha384, sha512 and ripemd160 hash algorithms.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 15:52:36 +01:00
Igor Mammedov
6b9c1dd2cd tests: acpi: add CPU hotplug testcase
Test with:

    -smp 2,cores=3,sockets=2,maxcpus=6

to capture sparse APIC ID values that default
AMD CPU has in above configuration.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 16:49:34 +03:00
Markus Armbruster
58eeb83cc7 log: Permit -dfilter 0..0xffffffffffffffff
Works fine since the previous commit fixed the underlying range data
type.  Of course it filters out nothing, but so does
0..1,2..0xffffffffffffffff, and we don't bother rejecting that either.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 16:49:33 +03:00
Markus Armbruster
6dd726a2bf range: Replace internal representation of Range
Range represents a range as follows.  Member @start is the inclusive
lower bound, member @end is the exclusive upper bound.  Zero @end is
special: if @start is also zero, the range is empty, else @end is to
be interpreted as 2^64.  No other empty ranges may occur.

The range [0,2^64-1] cannot be represented.  If you try to create it
with range_set_bounds1(), you get the empty range instead.  If you try
to create it with range_set_bounds() or range_extend(), assertions
fail.  Before range_set_bounds() existed, the open-coded creation
usually got you the empty range instead.  Open deathtrap.

Moreover, the code dealing with the janus-faced @end is too clever by
half.

Dumb this down to a more pedestrian representation: members @lob and
@upb are inclusive lower and upper bounds.  The empty range is encoded
as @lob = 1, @upb = 0.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 16:49:33 +03:00
Markus Armbruster
a0efbf1660 range: Eliminate direct Range member access
Users of struct Range mess liberally with its members, which makes
refactoring hard.  Create a set of methods, and convert all users to
call them instead of accessing members.  The methods have carefully
worded contracts, and use assertions to check them.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 16:49:33 +03:00
Markus Armbruster
58e19e6e79 log: Clean up misuse of Range for -dfilter
Range encodes an integer interval [a,b] as { begin = a, end = b + 1 },
where a \in [0,2^64-1] and b \in [1,2^64].  Thus, zero end is to be
interpreted as 2^64.

The implementation of -dfilter (commit 3514552) uses Range
differently: it encodes [a,b] as { begin = a, end = b }.  The code
works, but it contradicts the specification of Range in range.h.

Switch to the specified representation.  Since it can't represent
[0,UINT64_MAX], we have to reject that now.  Add a test for it.

While we're rejecting anyway: observe that we reject -dfilter LOB..UPB
where LOB > UPB when UPB is zero, but happily create an empty Range
when it isn't.  Reject it then, too, and add a test for it.

While there, add a positive test for the problematic upper bound
UINT64_MAX.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 16:49:33 +03:00
Cao jin
5178ecd863 pci_register_bar: cleanup
place relevant code tegother, make the code easier to read

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
2016-07-04 16:49:33 +03:00
Peter Maydell
3173a1fd54 Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160704' into staging
target-arm queue:
 * fix semihosting SYS_HEAPINFO call for A64 guests
 * fix crash if guest tries to write to ROM on imx boards
 * armv7m_nvic: fix crash for debugger reads from some registers
 * virt: mark PCIe host controller as dma-coherent in the DT
 * add data-driven register API
 * Xilinx Zynq: add devcfg device model
 * m25p80: fix various bugs
 * ast2400: add SMC controllers and SPI flash slaves

# gpg: Signature made Mon 04 Jul 2016 13:17:34 BST
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20160704: (23 commits)
  ast2400: create SPI flash slaves
  ast2400: add SPI flash slaves
  ast2400: add SMC controllers (FMC and SPI)
  m25p80: qdev-ify drive property
  m25p80: change cur_addr to 32 bit integer
  m25p80: avoid out of bounds accesses
  m25p80: do not put iovec on the stack
  ssi: change ssi_slave_init to be a realize ops
  xilinx_zynq: Connect devcfg to the Zynq machine model
  dma: Add Xilinx Zynq devcfg device model
  register: Add block initialise helper
  register: QOMify
  register: Define REG and FIELD macros
  register: Add Memory API glue
  register: Add Register API
  bitops: Add MAKE_64BIT_MASK macro
  hw/arm/virt: mark the PCIe host controller as DMA coherent in the DT
  armv7m_nvic: Use qemu_get_cpu(0) instead of current_cpu
  memory: Assert that memory_region_init_rom_device() ops aren't NULL
  imx: Use memory_region_init_rom() for ROMs
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 14:33:05 +01:00
Peter Maydell
9b9611c85d Merge remote-tracking branch 'remotes/kraxel/tags/pull-seabios-20160704-1' into staging
seabios: update from 1.9.1 to 1.9.3

# gpg: Signature made Mon 04 Jul 2016 10:29:47 BST
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-seabios-20160704-1:
  seabios: update binaries from 1.9.1 to 1.9.3
  seabios: update 128k config
  bios: Add fast variant of SeaBIOS for use with -kernel on x86.
  seabios: update submodule from 1.9.1 to 1.9.3

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:39:31 +01:00
Cédric Le Goater
e1ad9bc405 ast2400: create SPI flash slaves
A set of SPI flash slaves is attached under the flash controllers of
the palmetto platform. "n25q256a" flash modules are used for the BMC
and "mx25l25635e" for the host. These types are common in the
OpenPower ecosystem.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-9-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Cédric Le Goater
924ed16386 ast2400: add SPI flash slaves
Each controller on the ast2400 has a memory range on which it maps its
flash module slaves. Each slave is assigned a memory segment for its
mapping that can be changed at bootime with the Segment Address
Register. This is not supported in the current implementation so we
are using the defaults provided by the specs.

Each SPI flash slave can then be accessed in two modes: Command and
User. When in User mode, accesses to the memory segment of the slaves
are translated in SPI transfers. When in Command mode, the HW
generates the SPI commands automatically and the memory segment is
accessed as if doing a MMIO. Other SPI controllers call that mode
linear addressing mode.

For this purpose, we are adding below each crontoller an array of
structs gathering for each SPI flash module, a segment rank, a
MemoryRegion to handle the memory accesses and the associated SPI
slave device, which should be a m25p80.

Only the User mode is supported for now but we are preparing ground
for the Command mode. The framework is sufficient to support Linux.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-8-git-send-email-clg@kaod.org
[PMM: Use g_new0() rather than g_malloc0()]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Cédric Le Goater
7c1c69bca4 ast2400: add SMC controllers (FMC and SPI)
The Aspeed AST2400 soc includes a static memory controller for the BMC
which supports NOR, NAND and SPI flash memory modules. This controller
has two modes : the SMC for the legacy interface which supports only
one module and the FMC for the new interface which supports up to five
modules. The AST2400 also includes a SPI only controller used for the
host firmware, commonly called BIOS on Intel. It can be used in three
mode : a SPI master, SPI slave and SPI pass-through

Below is the initial framework for the SMC controller (FMC mode only)
and the SPI controller: the sysbus object, MMIO for registers
configuration and controls. Each controller has a SPI bus and a
configurable number of CS lines for SPI flash slaves.

The differences between the controllers are small, so they are
abstracted using indirections on the register numbers.

Only SPI flash modules are supported.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-7-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: added one missing error_propagate]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Paolo Bonzini
73bce5187b m25p80: qdev-ify drive property
This allows specifying the property via -drive if=none and creating
the flash device with -device.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-6-git-send-email-clg@kaod.org
[clg: added an extra fix for sabrelite_init()
      keeping the test on flash_dev did not seem necessary. ]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Paolo Bonzini
b7f480c3f6 m25p80: change cur_addr to 32 bit integer
The maximum amount of storage that can be addressed by the m25p80 command
set is 4 GiB.  However, cur_addr is currently a 64-bit integer.  To avoid
further problems related to sign extension of signed 32-bit integer
expressions, change cur_addr to a 32 bit integer.  Preserve migration
format by adding a dummy 4-byte field in place of the (big-endian)
high four bytes in the formerly 64-bit cur_addr field.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-5-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Paolo Bonzini
b68cb06093 m25p80: avoid out of bounds accesses
s->cur_addr can be made to point outside s->storage, either by
writing a value >= 128 to s->ear (because s->ear * MAX_3BYTES_SIZE
is a signed integer and sign-extends into the 64-bit cur_addr),
or just by writing an address beyond the size of the flash being
emulated.  Avoid the sign extension to make the code cleaner, and
on top of that mask s->cur_addr to s->size.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-4-git-send-email-clg@kaod.org
Reviewed by: Marcin Krzeminski <marcin.krzeminski@nokia.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Paolo Bonzini
cace7b801d m25p80: do not put iovec on the stack
When doing a read-modify-write cycle, QEMU uses the iovec after returning
from blk_aio_pwritev.  m25p80 puts the iovec on the stack of blk_aio_pwritev's
caller, which causes trouble in this case.  This has been a problem
since commit 243e6f6 ("m25p80: Switch to byte-based block access",
2016-05-12) started doing writes at a smaller granularity than 512 bytes.
In principle however it could have broken before when using -drive
if=mtd,cache=none on a disk with 4K native sectors.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-3-git-send-email-clg@kaod.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Cédric Le Goater
7673bb4cd3 ssi: change ssi_slave_init to be a realize ops
This enables qemu to handle late inits and report errors. All the SSI
slave routine names were changed accordingly. Code was modified to
handle errors when possible (m25p80 and ssi-sd)

Tested with the m25p80 slave object.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-id: 1467138270-32481-2-git-send-email-clg@kaod.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Peter Crosthwaite
f4b99537f1 xilinx_zynq: Connect devcfg to the Zynq machine model
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 85f39c9a13569b1113dacac3b952b0af54fc1260.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Alistair Francis
034c2e6902 dma: Add Xilinx Zynq devcfg device model
Add a minimal model for the devcfg device which is part of Zynq.
This model supports DMA capabilities and interrupt generation.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 83df49d8fa2d203a421ca71620809e4b04754e65.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Peter Crosthwaite
a74229597e register: Add block initialise helper
Add a helper that will scan a static RegisterAccessInfo Array
and populate a container MemoryRegion with registers as defined.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 347b810b2799e413c98d5bbeca97bcb1557946c3.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Peter Crosthwaite
49e14ddbce register: QOMify
QOMify registers as a child of TYPE_DEVICE. This allows registers to
define GPIOs.

Define an init helper that will do QOM initialisation.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: KONRAD Frederic <fred.konrad@greensocs.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 2545f71db26bf5586ca0c08a3e3cf1b217450552.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Peter Crosthwaite
684204593d register: Define REG and FIELD macros
Define some macros that can be used for defining registers and fields.

The REG32 macro will define A_FOO, for the byte address of a register
as well as R_FOO for the uint32_t[] register number (A_FOO / 4).

The FIELD macro will define FOO_BAR_MASK, FOO_BAR_SHIFT and
FOO_BAR_LENGTH constants for field BAR in register FOO.

Finally, there are some shorthand helpers for extracting/depositing
fields from registers based on these naming schemes.

Usage can greatly reduce the verbosity of device code.

The deposit and extract macros (eg FIELD_EX32, FIELD_DP32  etc.) can be
used to generate extract and deposits without any repetition of the name
stems.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Message-id: bbd87a3c03b1f173b1ed73a6d502c0196c18a72f.1467053537.git.alistair.francis@xilinx.com
[ EI Changes:
  * Add Deposit macros
]
Signed-off-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Alistair Francis
0b73c9bb06 register: Add Memory API glue
Add memory io handlers that glue the register API to the memory API.
Just translation functions at this stage. Although it does allow for
devices to be created without all-in-one mmio r/w handlers.

This patch also adds the RegisterInfoArray struct, which allows all of
the individual RegisterInfo structs to be grouped into a single memory
region.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: f7704d8ac6ac0f469ed35401f8151a38bd01468b.1467053537.git.alistair.francis@xilinx.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Alistair Francis
1599121b57 register: Add Register API
This API provides some encapsulation of registers and factors out some
common functionality to common code. Bits of device state (usually MMIO
registers) often have all sorts of access restrictions and semantics
associated with them. This API allows you to define what those
restrictions are on a bit-by-bit basis.

Helper functions are then used to access the register which observe the
semantics defined by the RegisterAccessInfo struct.

Some features:
Bits can be marked as read_only (ro field)
Bits can be marked as write-1-clear (w1c field)
Bits can be marked as reserved (rsvd field)
Reset values can be defined (reset)
Bits can be marked clear on read (cor)
Pre and post action callbacks can be added to read and write ops
Verbose debugging info can be enabled/disabled

Useful for defining device register spaces in a data driven way. Cuts
down on a lot of the verbosity and repetition in the switch-case blocks
in the standard foo_mmio_read/write functions.

Also useful for automated generation of device models from hardware
design sources.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 40d62c7e1bf6e63bb4193ec46b15092a7d981e59.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Alistair Francis
ae2923b5c2 bitops: Add MAKE_64BIT_MASK macro
Add a macro that creates a 64bit value which has length number of ones
shifted across by the value of shift.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 9773244aa1c8c26b8b82cb261d8f5dd4b7b9fcf9.1467053537.git.alistair.francis@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Ard Biesheuvel
5d636e21c4 hw/arm/virt: mark the PCIe host controller as DMA coherent in the DT
Since QEMU performs cacheable accesses to guest memory when doing DMA
as part of the implementation of emulated PCI devices, guest drivers
should use cacheable accesses as well when running under KVM. Since this
essentially means that emulated PCI devices are DMA coherent, set the
'dma-coherent' DT property on the PCIe host controller DT node.

This brings the DT description into line with the ACPI description,
which already marks the PCI bridge as cache coherent (see commit
bc64b96c98).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Message-id: 1467134090-5099-1-git-send-email-ard.biesheuvel@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Andrey Smirnov
a19861666b armv7m_nvic: Use qemu_get_cpu(0) instead of current_cpu
Starting QEMU with -S results in current_cpu containing its initial
value of NULL. It is however possible to connect to such QEMU instance
and query various CPU registers, one example being CPUID, and doing that
results in QEMU segfaulting.

Using qemu_get_cpu(0) seem reasonable enough given that ARMv7M
architecture is a single core architecture.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 13:15:22 +01:00
Peter Maydell
39e0b03dec memory: Assert that memory_region_init_rom_device() ops aren't NULL
It doesn't make sense to pass a NULL ops argument to
memory_region_init_rom_device(), because the effect will
be that if the guest tries to write to the memory region
then QEMU will segfault. Catch the bug earlier by sanity
checking the arguments to this function, and remove the
misleading documentation that suggests that passing NULL
might be sensible.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1467122287-24974-4-git-send-email-peter.maydell@linaro.org
2016-07-04 13:06:35 +01:00
Peter Maydell
a7aeb5f7b2 imx: Use memory_region_init_rom() for ROMs
The imx boards were all incorrectly creating ROMs using
memory_region_init_rom_device() with a NULL ops pointer. This
will cause QEMU to abort if the guest tries to write to the
ROM. Switch to the new memory_region_init_rom() instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1467122287-24974-3-git-send-email-peter.maydell@linaro.org
2016-07-04 13:06:35 +01:00
Peter Maydell
a1777f7f64 memory: Provide memory_region_init_rom()
Provide a new helper function memory_region_init_rom() for memory
regions which are read-only (and unlike those created by
memory_region_init_rom_device() don't have special behaviour
for writes). This has the same behaviour as calling
memory_region_init_ram() and then memory_region_set_readonly()
(which is what we do today in boards with pure ROMs) but is a
more easily discoverable API for the purpose.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1467122287-24974-2-git-send-email-peter.maydell@linaro.org
2016-07-04 13:06:35 +01:00
Peter Maydell
f5666418c4 target-arm/arm-semi.c: Fix SYS_HEAPINFO for 64-bit guests
SYS_HEAPINFO is one of the few semihosting calls which has to write
values back into a parameter block in memory.  When we added
support for 64-bit semihosting we updated the code which reads from
the parameter block to read 64-bit words but forgot to change the
code that writes back into the block. Update it to treat the
block as a set of words of the appropriate width for the guest.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1466783381-29506-3-git-send-email-peter.maydell@linaro.org
2016-07-04 13:06:35 +01:00
Peter Maydell
d317091d5e linux-user: Make semihosting heap/stack fields abi_ulongs
The fields in the TaskState heap_base, heap_limit and stack_base
are all guest addresses (representing the locations of the heap
and stack for the guest binary), so they should be abi_ulong
rather than uint32_t. (This only in practice affects ARM AArch64
since all the other semihosting implementations are 32-bit.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Message-id: 1466783381-29506-2-git-send-email-peter.maydell@linaro.org
2016-07-04 13:06:35 +01:00
Michael S. Tsirkin
6c6668232e Revert "virtio-net: unbreak self announcement and guest offloads after migration"
This reverts commit 1f8828ef57.

Cc: qemu-stable@nongnu.org
Reported-by: Robin Geuze <robing@transip.nl>
Tested-by: Robin Geuze <robing@transip.nl>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:52:10 +03:00
Michael S. Tsirkin
62cee1a28a virtio: set low features early on load
virtio migrates the low 32 feature bits twice, the first copy is there
for compatibility but ever since
019a3edbb2: ("virtio: make features 64bit
wide") it's ignored on load. This is wrong since virtio_net_load tests
self announcement and guest offloads before the second copy including
high feature bits is loaded.  This means that self announcement, control
vq and guest offloads are all broken after migration.

Fix it up by loading low feature bits: somewhat ugly since high and low
bits become out of sync temporarily, but seems unavoidable for
compatibility.  The right thing to do for new features is probably to
test the host features, anyway.

Fixes: 019a3edbb2
    ("virtio: make features 64bit wide")
Cc: qemu-stable@nongnu.org
Reported-by: Robin Geuze <robing@transip.nl>
Tested-by: Robin Geuze <robing@transip.nl>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:52:10 +03:00
Cornelia Huck
0830c96d70 virtio: revert host notifiers to old semantics
The host notifier rework tried both to unify host notifiers across
transports and plug a possible hole during host notifier
re-assignment. Unfortunately, this meant a change in semantics that
breaks vhost and iSCSI+dataplane.

As the minimal fix, keep the common host notifier code but revert
to the old semantics so that we have time to figure out the proper
fix.

Fixes: 6798e245a3 ("virtio-bus: common ioeventfd infrastructure")
Reported-by: Peter Lieven <pl@kamp.de>
Reported-by: Jason Wang <jasowang@redhat.com>
Reported-by: Marc-André Lureau <marcandre.lureau@gmail.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Tested-by: Jason Wang <jasowang@redhat.com>
Tested-by: Peter Lieven <pl@kamp.de>
2016-07-04 14:52:10 +03:00
Markus Armbruster
01c9742d9d pc: Eliminate PcPciInfo
PcPciInfo has two (ill-named) members: Range w32 is the PCI hole, and
w64 is the PCI64 hole.

Three users:

* I440FXState and MCHPCIState have a member PcPciInfo pci_info, but
  only pci_info.w32 is actually used.  This is confusing.  Replace by
  Range pci_hole.

* acpi_build() uses auto PcPciInfo pci_info to forward both PCI holes
  from acpi_get_pci_info() to build_dsdt().  Replace by two variables
  Range pci_hole, pci_hole64.  Rename acpi_get_pci_info() to
  acpi_get_pci_holes().

PcPciInfo is now unused; drop it.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-07-04 14:52:10 +03:00
Markus Armbruster
97a83ec3a9 piix: Set I440FXState member pci_info.w32 in one place
Range pci_info.w32 records the location of the PCI hole.

It's initialized to empty when QOM zeroes I440FXState.  That's a fine
value for a still unknown PCI hole.

i440fx_init() sets pci_info.w32.begin = below_4g_mem_size.  Changes
the PCI hole from empty to [below_4g_mem_size, UINT64_MAX].  That's a
bogus value.

i440fx_pcihost_initfn() sets pci_info.end = IO_APIC_DEFAULT_ADDRESS.
Since i440fx_init() ran already, this changes the PCI hole to
[below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS-1].  That's the correct
value.

Setting the bounds of the PCI hole in two separate places is
confusing, and begs the question whether the bogus intermediate value
could be used by something, or what would happen if we somehow managed
to realize an i440FX device without having run the board init function
i440fx_init() first.

Avoid the confusion by setting the (constant) upper bound along with
the lower bound in i440fx_init().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
2016-07-04 14:50:59 +03:00
Marcel Apfelbaum
10d01f73e3 machine: remove iommu property
Since iommu devices can be created with '-device' there is
no need to keep iommu as machine and mch property.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:58 +03:00
Marcel Apfelbaum
621d983a1f hw/iommu: enable iommu with -device
Use the standard '-device intel-iommu' to create the IOMMU device.
The legacy '-machine,iommu=on' can still be used.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:58 +03:00
Marcel Apfelbaum
bf8d492405 q35: allow dynamic sysbus
Allow adding sysbus devices with -device on Q35.

At first Q35 will support only intel-iommu to be added this way,
however the command line will support all sysbus devices.

Mark with 'cannot_instantiate_with_device_add_yet' the ones
causing immediate problems (e.g. crashes).

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:01 +03:00
Marcel Apfelbaum
b86eacb804 hw/pci: delay bus_master_enable_region initialization
Skip bus_master_enable region creation on PCI device init
in order to be sure the IOMMU device (if present) would
be created in advance. Add this memory region at machine_done time.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:01 +03:00
Marcel Apfelbaum
1b04cc801a hw/ppc: realize the PCI root bus as part of mac99 init
Mac99's PCI root bus is not part of a host bridge,
realize it manually.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:01 +03:00
Gerd Hoffmann
5ec7d09818 xen: fix ram init regression
Commit "8156d48 pc: allow raising low memory via max-ram-below-4g
option" causes a regression on xen, because it uses a different
memory split.

This patch initializes max-ram-below-4g to zero and leaves the
initialization to the memory initialization functions.  That way
they can pick different default values (max-ram-below-4g is zero
still) or use the user supplied value (max-ram-below-4g is non-zero).

Also skip the whole ram split calculation on Xen.  xen_ram_init()
does its own split calculation anyway so it is superfluous, also
this way xen_ram_init can actually see whenever max-ram-below-4g
is zero or not.

Reported-by: Anthony PERARD <anthony.perard@citrix.com>
Tested-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-07-04 14:50:00 +03:00
Peter Maydell
e2c8f9e44e Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sun 03 Jul 2016 23:03:04 BST
# gpg:                using RSA key 0xE3E51CE8FB6B2F1D
# gpg: Good signature from "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: F632 74CD C630 0873 CB3D  29D9 E3E5 1CE8 FB6B 2F1D

* remotes/thibault/tags/samuel-thibault:
  slirp: Add support for stateless DHCPv6
  slirp: Remove superfluous memset() calls from the TFTP code
  slirp: Add RDNSS advertisement
  slirp: Support link-local DNS addresses
  slirp: Add dns6 resolution
  slirp: Split get_dns_addr

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-07-04 10:49:17 +01:00
Daniel P. Berrange
0c16c056a4 crypto: switch hash code to use nettle/gcrypt directly
Currently the internal hash code is using the gnutls hash APIs.
GNUTLS in turn is wrapping either nettle or gcrypt. Not only
were the GNUTLS hash APIs not added until GNUTLS 2.9.10, but
they don't expose support for all the algorithms QEMU needs
to use with LUKS.

Address this by directly wrapping nettle/gcrypt in QEMU and
avoiding GNUTLS's extra layer of indirection. This gives us
support for hash functions on a much wider range of platforms
and opens up ability to support more hash functions. It also
avoids a GNUTLS bug which would not correctly handle hashing
of large data blocks if int != size_t.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 10:47:09 +01:00
Daniel P. Berrange
8cbfc94269 crypto: rename OUT to out in xts test to avoid clash on MinGW
On MinGW one of the system headers already has "OUT" defined
which causes a compile failure of the test suite. Rename the
test suite var to 'out' to avoid this clash

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 10:46:59 +01:00
Daniel P. Berrange
8b7cdba386 crypto: fix handling of iv generator hash defaults
When opening an existing LUKS volume, if the iv generator is
essiv, then the iv hash algorithm is mandatory to provide. We
must report an error if it is omitted in the cipher mode spec,
not silently default to hash 0 (md5).  If the iv generator is
not essiv, then we explicitly ignore any iv hash algorithm,
rather than report an error, for compatibility with dm-crypt.

When creating a new LUKS volume, if the iv generator is essiv
and no iv hsah algorithm is provided, we should default to
using the sha256 hash.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-07-04 10:46:59 +01:00
Thomas Huth
7b143999f2 slirp: Add support for stateless DHCPv6
Provide basic support for stateless DHCPv6 (see RFC 3736) so
that guests can also automatically boot via IPv6 with SLIRP
(for IPv6 network booting, see RFC 5970 for details).

Tested with:

    qemu-system-ppc64 -nographic -vga none -boot n -net nic \
        -net user,ipv6=yes,ipv4=no,tftp=/path/to/tftp,bootfile=ppc64.img

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2016-07-03 23:59:42 +02:00
Thomas Huth
e5857062a6 slirp: Remove superfluous memset() calls from the TFTP code
Commit fad7fb9ccd  ("Add IPv6 support to the TFTP code")
refactored some common code for preparing the mbuf into a new
function called tftp_prep_mbuf_data(). One part of this common
code is to do a "memset(m->m_data, 0, m->m_size);" for the related
buffer first. However, at two spots, the memset() was not removed
from the calling function, so it currently done twice in these code
paths. Thus let's delete these superfluous memsets in the calling
functions now.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2016-07-03 23:59:42 +02:00
Samuel Thibault
f7725df387 slirp: Add RDNSS advertisement
This adds the RDNSS option to IPv6 router advertisements, so that the guest
can autoconfigure the DNS server address.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>

---
Changes since last submission:
- Disable on windows, until we have support for it
2016-07-03 23:31:12 +02:00
Samuel Thibault
ef763fa4bd slirp: Support link-local DNS addresses
They look like fe80::%eth0

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>

---
Changes since last submission:
- fix windows build
2016-07-03 23:29:13 +02:00
Samuel Thibault
1d17654e76 slirp: Add dns6 resolution
This makes get_dns_addr address family-agnostic, thus allowing to add the
IPv6 case.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-07-03 23:27:08 +02:00
Samuel Thibault
972487b878 slirp: Split get_dns_addr
Separate get_dns_addr into get_dns_addr_cached and get_dns_addr_resolv_conf
to make conversion to IPv6 easier.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2016-07-03 23:24:54 +02:00
Gerd Hoffmann
8df42d855c build: add pc-bios to config-host.mak deps
... so configure re-runs on pc-bios updates such as new pxe roms.
Needed because configure symlinks the prebuilt roms from src
into build tree.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-01 13:31:44 +02:00
Gerd Hoffmann
45027808cd ipxe: add new roms to BLOBS
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-07-01 13:26:57 +02:00
Gerd Hoffmann
c52125ab92 ipxe: update prebuilt binaries
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 14:18:19 +02:00
Gerd Hoffmann
43716de6b3 vmxnet3: add boot rom
Disable for old machine types as this is a guest visible change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 14:11:36 +02:00
Gerd Hoffmann
1676103dc2 e1000e: add boot rom
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 13:56:36 +02:00
Gerd Hoffmann
4d9dc8b7a8 ipxe: add vmxnet3 rom
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 13:53:57 +02:00
Gerd Hoffmann
c9c3dc5f4b ipxe: add e1000e rom
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 13:53:52 +02:00
Gerd Hoffmann
ffdc5a2bce ipxe: update submodule from 4e03af8ec to 041863191
shortlog
========

Andrew Widdersheim (1):
      [netdevice] Add "ifname" setting

Carl Henrik Lunde (1):
      [vmxnet3] Avoid completely filling the TX descriptor ring

Christian Hesse (2):
      [golan] Fix build error on some versions of gcc
      [ath9k] Fix buffer overrun for ar9287

Christian Nilsson (2):
      [intel] Add PCI device ID for another I219-V
      [intel] Add PCI device ID for another I219-LM

Hummel Frank (1):
      [intel] Add INTEL_NO_PHY_RST for I218-LM

Kyösti Mälkki (1):
      [intel] Add PCI IDs for i210/i211 flashless operation

Ladi Prosek (6):
      [pci] Add pci_find_next_capability()
      [virtio] Add virtio 1.0 constants and data structures
      [virtio] Add virtio 1.0 PCI support
      [virtio] Add virtio-net 1.0 support
      [virtio] Renumber virtio_pci_region flags
      [virtio] Fix virtio-pci logging

Leendert van Doorn (2):
      [tg3] Fix address truncation bug on 64-bit machines
      [tg3] Add missing memory barrier

Michael Brown (287):
      [settings] Re-add "uristring" setting type
      [dhcp] Do not skip ProxyDHCPREQUEST if next-server is empty
      [efi] Add definitions of GUIDs observed when booting shim.efi and grub.efi
      [efi] Mark EFI debug transcription functions as __attribute__ (( pure ))
      [efi] Remove raw EFI_HANDLE values from debug messages
      [efi] Include installed protocol list in unknown handle names
      [efi] Improve efi_wrap debugging
      [pxe] Construct all fake DHCP packets before starting PXE NBP
      [efi] Add definitions of GUIDs observed when booting wdsmgfw.efi
      [efi] Fix debug directory size
      [efi] Populate debug directory entry FileOffset field
      [build] Search for ldlinux.c32 separately from isolinux.bin
      [tcpip] Allow supported address families to be detected at runtime
      [efi] Allow calls to efi_snp_claim() and efi_snp_release() to be nested
      [efi] Fix order of events on SNP removal path
      [efi] Do not return EFI_NOT_READY from our ReceiveFilters() method
      [pxe] Populate ciaddr in fake PXE Boot Server ACK packet
      [uri] Generalise tftp_uri() to pxe_uri()
      [efi] Implement the EFI_PXE_BASE_CODE_PROTOCOL
      [usb] Expose usb_find_driver()
      [usb] Add function to device's function list before attempting probe
      [efi] Add USB headers and GUID definitions
      [efi] Allow efidev_parent() to traverse multiple device generations
      [efi] Add a USB host controller driver based on EFI_USB_IO_PROTOCOL
      [tcpip] Avoid generating positive zero for transmitted UDP checksums
      [usb] Generalise zero-length packet generation logic
      [ehci] Do not treat zero-length NULL pointers as unreachable
      [ehci] Support arbitrarily large transfers
      [xhci] Support arbitrarily large transfers
      [efi] Provide efi_devpath_len()
      [efi] Include a copy of the device path within struct efi_device
      [usb] Select preferred USB device configuration based on driver score
      [usb] Allow for wildcard USB class IDs
      [efi] Expose unused USB devices via EFI_USB_IO_PROTOCOL
      [ncm] Support setting MAC address
      [build] Remove dependency on libiberty
      [efi] Minimise use of iPXE header files when building host utilities
      [pxe] Invoke INT 1a,564e when PXE stack is activated
      [pxe] Notify BIOS via INT 1a,564e for each new network device
      [efi] Work around broken 32-bit PE executable parsing in ImageHlp.dll
      [efi] Avoid infinite loops when asked to stop non-existent devices
      [efi] Expose an UNDI interface alongside the existing SNP interface
      [malloc] Avoid integer overflow for excessively large memory allocations
      [peerdist] Avoid NULL pointer dereference for plaintext blocks
      [http] Verify server port when reusing a pooled connection
      [efi] Reset root directory when installing EFI_SIMPLE_FILE_SYSTEM_PROTOCOL
      [efi] Update to current EDK2 headers
      [efi] Import EFI_HII_FONT_PROTOCOL definitions
      [fbcon] Allow character height to be selected at runtime
      [fbcon] Move margin calculations to fbcon.c
      [console] Tidy up config/console.h
      [build] Generalise CONSOLE_VESAFB to CONSOLE_FRAMEBUFFER
      [efi] Add support for EFI_GRAPHICS_OUTPUT_PROTOCOL frame buffer consoles
      [dhcp] Reset start time when deferring discovery
      [dhcp] Limit maximum number of DHCP discovery deferrals
      [comboot] Reset console before starting COMBOOT executable
      [intel] Forcibly skip PHY reset on some models
      [intel] Correct definition of receive overrun bit
      [infiniband] Add definitions for FDR and EDR link speeds
      [infiniband] Add qword accessors for ib_guid and ib_gid
      [pci] Add definitions for PCI Express function level reset (FLR)
      [bitops] Fix definitions for big-endian devices
      [smsc95xx] Add driver for SMSC/Microchip LAN95xx USB Ethernet NICs
      [bitops] Provide BIT_QWORD_PTR()
      [efi] Add %.usb target for building EFI-bootable USB (or other) disk images
      [usb] Use port->disconnected to check for disconnected devices
      [usb] Record USB device speed separately from current port speed
      [usb] Allow USB device IDs to include arbitrary driver-specific data
      [usb] Allow additional settling time for out-of-spec hubs
      [acm] Add support for CDC-ACM (aka USB RNDIS) devices
      [xhci] Ensure that zero-length packets are not part of a TRB chain
      [efi] Centralise EFI file system info GUIDs
      [build] Allow extra objects to be included in an all-drivers build
      [bios] Add support for injecting keypresses
      [settings] Expose SMBIOS settings as global variables
      [smsc95xx] Allow for multiple methods for obtaining the MAC address
      [crypto] Dual-license selected DRBG files
      [smsc95xx] Fetch MAC from SMBIOS OEM string for Honeywell VM3
      [crypto] Dual-license more selected DRBG files
      [vmware] Expose GuestRPC mechanism in 64-bit builds
      [romprefix] Report an optimistic runtime size estimate
      [usb] Add support for numeric keypad on USB keyboards
      [http] Handle relative redirection URIs
      [image] Provide image_set_uri() to modify an image's URI
      [downloader] Update image URI in response to a redirection
      [tftp] Do not change current working URI when TFTP server is cleared
      [infiniband] Profile post work queue entry operations
      [pxe] Colourise debug output
      [pxe] Add debug message to display real-mode segment addresses
      [i386] Add check_bios_interrupts() debug function
      [debug] Allow debug colourisation to be disabled
      [stp] Fix incorrectly disambiguated errors
      [build] Add named configuration for public cloud environments
      [smsc95xx] Enable LEDs
      [usb] Allow USB endpoints to specify a reserved header length for refills
      [smsc95xx] Reserve headroom in received packets
      [autoboot] Fix incorrect boolean logic
      [uri] Avoid potentially large stack allocation
      [ocsp] Avoid including a double path separator in request URI
      [tftp] Mangle initial slash on TFTP URIs
      [uri] Apply URI decoding for all parsed URIs
      [tcp] Guard against malformed TCP options
      [slam] Avoid potential division by zero
      [ath9k] Remove broken ath_rxbuf_alloc()
      [ehci] Add extra debugging information
      [malloc] Guard against unsigned integer overflow
      [iobuf] Improve robustness of I/O buffer allocation
      [pxe] Clarify comments regarding shrinking of cached DHCP packet
      [efi] Add missing definitions for function key scancodes
      [prefix] Pad .text16 and .data16 segment sizes at build time
      [libc] Split rmsetjmp() and rmlongjmp() into a separate rmsetjmp.h
      [bios] Use intptr_t when casting .text16 function pointers
      [bios] Use size_t when casting _text16_memsz and _data16_memsz
      [bios] Allow relocate.c to be compiled for x86_64
      [bios] Allow rtc_entropy.c to be compiled for x86_64
      [bios] Allow bzimage.c to be compiled for x86_64
      [bios] Allow bios_console.c to be compiled for x86_64
      [bios] Allow memmap.c to be compiled for x86_64
      [bios] Allow librm to be compiled for x86_64
      [bios] Move isolinux definitions to Makefile.pcbios
      [bios] Add bin-x86_64-pcbios build platform
      [librm] Discard argument as part of return from prot_call()
      [librm] Discard argument as part of return from real_call()
      [prefix] Align INT 15,88 temporary decompression area to a page boundary
      [romprefix] Align PMM temporary decompression area to a page boundary
      [bios] Make uses of REAL_CODE() and PHYS_CODE() 64-bit clean
      [librm] Use garbage-collectable section names
      [bios] Use an 8kB stack for x86_64
      [prefix] Use garbage-collectable section names
      [librm] Simplify definitions for prot_call() and real_call() stack frames
      [prefix] Standardise calls to prot_call()
      [librm] Convert prot_call() to a real-mode near call
      [librm] Provide an abstraction wrapper for prot_call
      [librm] Transition to protected mode within init_librm()
      [relocate] Preserve page alignment during relocation
      [librm] Prepare for long-mode memory map
      [librm] Generate page tables for 64-bit builds
      [build] Fix building on older versions of binutils
      [librm] Add phys_call() wrapper for calling code with physical addressing
      [librm] Do not preserve flags unnecessarily
      [librm] Mark virt_offset, text16, data16, rm_cs, and rm_ds as constant
      [librm] Support userptr_t in 64-bit builds
      [librm] Rename prot_call() to virt_call()
      [librm] Add support for running in 64-bit long mode
      [ioapi] Split ioremap() out to a separate IOMAP API
      [librm] Support ioremap() for addresses above 4GB in a 64-bit build
      [netdevice] Refuse to create duplicate network device names
      [infiniband] Remove concept of whole-device owner data
      [infiniband] Avoid multiple calls to ib_cmrc_shutdown()
      [infiniband] Add support for performing service record lookups
      [infiniband] Assign names to Infiniband devices for debug messages
      [infiniband] Use "%#lx" as format specifier for queue pair numbers
      [infiniband] Use "%d" as format specifier for LIDs
      [infiniband] Use connection's local ID as debug message identifier
      [infiniband] Use correct transaction identifier in CM responses
      [infiniband] Do not use GRH for local paths
      [infiniband] Record multicast GID attachment as part of group membership
      [infiniband] Parse MLID, rate, and SL from multicast membership record
      [ipoib] Avoid unnecessary path record lookup for broadcast address
      [ipoib] Simplify test for received broadcast packets
      [infiniband] Allow for the creation of multicast groups
      [pcbios] Restrict external memory allocations to the low 4GB
      [infiniband] Assign names to CMRC connections
      [infiniband] Assign names to queue pairs
      [infiniband] Add "ibstat" command
      [infiniband] Retrieve GID flag from cached path entries
      [ipoib] Resimplify test for received broadcast packets
      [ipoib] Increase number of transmit work queue entries
      [ifmgmt] Include human-readable error message for configuration failure
      [infiniband] Make IPoIB support configurable at build time
      [eoib] Add Ethernet over Infiniband (EoIB) driver
      [eoib] Silently ignore EoIB heartbeat packets
      [eoib] Allow the multicast group to be forcefully created
      [eoib] Support non-FullMember gateway devices
      [xsigo] Add support for Xsigo virtual Ethernet (XVE) EoIB devices
      [efi] Work around broken GetFontInfo() implementations
      [tls] Avoid potential out-of-bound reads in length fields
      [crypto] Allow for zero-length ASN.1 cursors
      [pixbuf] Check for unsigned integer overflow on multiplication
      [arp] Validate length of ARP packet
      [librm] Do not unconditionally preserve flags across virt_call()
      [linda] Use standard readq() and writeq() implementations
      [qib7322] Use standard readq() and writeq() implementations
      [test] Add missing #include <string.h>
      [serial] Add missing #include <string.h>
      [3c595] Fix compilation when "char" is unsigned by default
      [tg3] Remove x86-specific inline assembly
      [efi] Centralise architecture-independent EFI Makefile and linker script
      [build] Allow assembler section type character to vary by architecture
      [build] Accept CROSS= as a synonym for CROSS_COMPILE=
      [efi] Update to current EDK2 headers
      [efi] Add processor binding headers for ARM and AArch64
      [uri] Support URIs containing only scheme and path components
      [uri] Support "file:" URIs describing relative paths
      [efi] Provide access to files stored on EFI filesystems
      [build] Remove long-obsolete header file
      [pseudobit] Rename bitops.h to pseudobit.h
      [bitops] Add generic atomic bit test, set, and clear functions
      [hyperv] Use generic set_bit() function
      [xen] Use generic test_and_clear_bit() function
      [test] Move i386-specific tests to arch/i386/tests
      [efi] Move architecture-independent EFI prefixes to interface/efi
      [libc] Allow container_of() to be used on volatile pointers
      [ipoib] Allow external code to identify IPoIB network devices
      [hermon] Add missing iounmap()
      [arbel] Add missing iounmap()
      [linda] Add missing iounmap()
      [qib7322] Add missing iounmap()
      [crypto] Allow trusted certificates to be stored in non-volatile options
      [hermon] Allocate space for GRH on UD queue pairs
      [arbel] Allocate space for GRH on UD queue pairs
      [infiniband] Allow drivers to override the eIPoIB LEMAC
      [build] Do not use "objcopy -O binary" for objects with relocation records
      [gdb] Add support for x86_64
      [int13] Allow drive to be hooked using the natural drive number
      [int13] Allow default drive to be specified via "san-drive" setting
      [3c5x9] Avoid use of sleep() in driver code
      [etherfabric] Avoid use of sleep() in driver code
      [hermon] Fix received packet length
      [arbel] Fix received packet length
      [libc] Make sleep() interruptible
      [pxe] Implicitly open network device in PXENV_UDP_OPEN
      [prefix] Use CRC32 to verify each block prior to decompression
      [crypto] Allow cross-certificate source to be configured at build time
      [iscsi] Include DHCP server address in iBFT
      [netdevice] Return ENOENT for an unknown bus type
      [linda] Validate payload length
      [qib7322] Validate payload length
      [test] Update snprintf_ok() to use okx()
      [libc] Print "<NULL>" for wide-character NULL strings
      [efi] Work around broken EFI HII specification
      [comboot] Support COMBOOT in 64-bit builds
      [ethernet] Make LACP support configurable at build time
      [libc] Allow CPU architectures to use unoptimised string functions
      [libgcc] Provide symbol to handle gcc's implicit calls to memset()
      [image] Skip misleading "format not recognised" error message
      [librm] Reduce real-mode stack consumption in virt_call()
      [tg3] Fix _tg3_flag() for 64-bit builds
      [librm] Preserve FPU, MMX and SSE state across calls to virt_call()
      [efi] Eliminate use of libbfd
      [build] Remove unnecessary dependency on zlib
      [tcpip] Do not fall back to using unoptimised TCP/IP checksumming
      [efi] Use a timer event to generate the currticks() timer
      [efi] Generalise EFI entropy generation to non-x86 CPUs
      [sis190] Fix building with GCC 6
      [skge] Fix building with GCC 6
      [golan] Fix building with GCC 6
      [ath] Fix building with GCC 6
      [legacy] Fix building with GCC 6
      [libgcc] Provide __divmoddi4()
      [bitops] Fix typo in test case
      [arm] Add support for 32-bit ARM
      [arm] Avoid instruction references to symbols defined via ".equ"
      [arm] Split out 32-bit-specific code to arch/arm32
      [arm] Add support for 64-bit ARM (Aarch64)
      [efi] Allow for building with older versions of elf.h system header
      [libc] Avoid implicit assumptions about potentially-optimised memcpy()
      [arm] Add optimised string functions for 64-bit ARM
      [arm] Add optimised TCP/IP checksumming for 64-bit ARM
      [efi] Guard against GetStatus() failing to return a NULL TX buffer
      [arm] Use CNTVCT_EL0 as profiling timestamp
      [undi] Work around broken HP EliteBook 745 G3 PXE ROM
      [pci] Add support for PCI Enhanced Allocation
      [settings] Extend numerical setting tags to "unsigned long"
      [netdevice] Fix failure path in register_netdev()
      [lotest] Add option to use broadcast packets for loopback testing
      [http] Ignore unrecognised "Connection" header tokens
      [efi] Work around broken UEFI keyboard drivers
      [axge] Add driver for ASIX 10/100/1000 USB Ethernet NICs
      [arm] Use correct DHCP client architecture values
      [dhcp] Fix definitions for x86_64 and EFI BC client architectures
      [efi] Expose DHCP packets via the Apple NetBoot protocol
      [libc] Always use a non-zero seed for the (non-crypto) RNG
      [pci] Support systems with multiple PCI root bridges
      [http] Accept headers with no whitespace following the colon
      [tcp] Send TCP keepalives on idle established connections
      [time] Allow system clock to be adjusted at runtime
      [ntp] Add simple NTP client
      [cmdline] Add "ntp" command
      [thunderx] Add driver for Cavium ThunderX SoC NICs
      [thunderx] Fix channel configuration for VNICs 1-7
      [efi] Include VLAN in SNP device path if applicable
      [thunderx] Retrieve base MAC address via EFI_THUNDER_CONFIG_PROTOCOL
      [smsc75xx] Allow up to 100ms for reset to complete
      [efi] Report failures to stop the EFI timer tick event
      [efi] Do not copy garbage bytes into SNP device path MAC address
      [thunderx] Fix compilation with older versions of gcc

Mika Tiainen (1):
      [intel] Add INTEL_NO_PHY_RST for another I218-LM variant

Suresh Sundriyal (1):
      [pool] Fix check for reopenable pooled connections

Torgeir Wulfsberg (1):
      [intel] Add INTEL_NO_PHY_RST for I217-LM

Vinson Lee (2):
      [mucurses] Fix GCC 6 nonnull-compare errors
      [build] Remove nested "my" declaration

Wissam Shoukair (3):
      [golan] Add Connect-IB, ConnectX-4 and ConnectX-4 Lx (Infiniband) support
      [mlx_icmd] Fix compilation error in GCC versions newer than 4.6.4
      [golan] Add missing iounmap()

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-06-24 13:41:56 +02:00
1357 changed files with 39509 additions and 16754 deletions

4
.gitignore vendored
View File

@@ -95,6 +95,10 @@
/pc-bios/optionrom/linuxboot.bin
/pc-bios/optionrom/linuxboot.raw
/pc-bios/optionrom/linuxboot.img
/pc-bios/optionrom/linuxboot_dma.asm
/pc-bios/optionrom/linuxboot_dma.bin
/pc-bios/optionrom/linuxboot_dma.raw
/pc-bios/optionrom/linuxboot_dma.img
/pc-bios/optionrom/multiboot.asm
/pc-bios/optionrom/multiboot.bin
/pc-bios/optionrom/multiboot.raw

View File

@@ -34,10 +34,13 @@ addons:
- sparse
- uuid-dev
# The channel name "irc.oftc.net#qemu" is encrypted against qemu/qemu
# to prevent IRC notifications from forks. This was created using:
# $ travis encrypt -r "qemu/qemu" "irc.oftc.net#qemu"
notifications:
irc:
channels:
- "irc.oftc.net#qemu"
- secure: "F7GDRgjuOo5IUyRLqSkmDL7kvdU4UcH3Lm/W2db2JnDHTGCqgEdaYEYKciyCLZ57vOTsTsOgesN8iUT7hNHBd1KWKjZe9KDTZWppWRYVwAwQMzVeSOsbbU4tRoJ6Pp+3qhH1Z0eGYR9ZgKYAoTumDFgSAYRp4IscKS8jkoedOqM="
on_success: change
on_failure: always
env:

View File

@@ -31,7 +31,11 @@ Do not leave whitespace dangling off the ends of lines.
2. Line width
Lines are 80 characters; not longer.
Lines should be 80 characters; try not to make them longer.
Sometimes it is hard to do, especially when dealing with QEMU subsystems
that use long function or symbol names. Even in that case, do not make
lines much longer than 80 characters.
Rationale:
- Some people like to tile their 24" screens with a 6x4 matrix of 80x24
@@ -39,6 +43,8 @@ Rationale:
let them keep doing it.
- Code and especially patches is much more readable if limited to a sane
line length. Eighty is traditional.
- The four-space indentation makes the most common excuse ("But look
at all that white space on the left!") moot.
- It is the QEMU coding style.
3. Naming

View File

@@ -158,6 +158,10 @@ painful. These are:
* you may assume that right shift of a signed integer duplicates
the sign bit (ie it is an arithmetic shift, not a logical shift)
In addition, QEMU assumes that the compiler does not use the latitude
given in C99 and C11 to treat aspects of signed '<<' as undefined, as
documented in the GNU Compiler Collection manual starting at version 4.0.
7. Error handling and reporting
7.1 Reporting errors to the human user

View File

@@ -449,23 +449,23 @@ S: Maintained
F: hw/*/versatile*
Xilinx Zynq
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/xilinx_zynq.c
F: hw/misc/zynq_slcr.c
F: hw/*/xilinx_*
F: hw/*/cadence_*
F: hw/ssi/xilinx_spips.c
F: hw/misc/zynq_slcr.c
F: include/hw/xilinx.h
X: hw/ssi/xilinx_*
Xilinx ZynqMP
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/xlnx-zynqmp.c
F: hw/arm/xlnx-ep108.c
F: include/hw/arm/xlnx-zynqmp.h
F: hw/*/xlnx*.c
F: include/hw/*/xlnx*.c
ARM ACPI Subsystem
M: Shannon Zhao <zhaoshenglong@huawei.com>
@@ -948,14 +948,6 @@ S: Supported
F: hw/scsi/megasas.c
F: hw/scsi/mfi.h
Xilinx EDK
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
M: Alistair Francis <alistair.francis@xilinx.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
S: Maintained
F: hw/*/xilinx_*
F: include/hw/xilinx.h
Network packet abstractions
M: Dmitry Fleytman <dmitry@daynix.com>
S: Maintained
@@ -971,7 +963,6 @@ F: hw/net/vmxnet*
F: hw/scsi/vmw_pvscsi*
Rocker
M: Scott Feldman <sfeldma@gmail.com>
M: Jiri Pirko <jiri@resnulli.us>
S: Maintained
F: hw/net/rocker/
@@ -1027,6 +1018,7 @@ F: async.c
F: aio-*.c
F: block/io.c
F: migration/block*
F: include/block/aio.h
T: git git://github.com/stefanha/qemu.git block
Block Jobs
@@ -1177,6 +1169,13 @@ F: numa.c
F: include/sysemu/numa.h
T: git git://github.com/ehabkost/qemu.git numa
Host Memory Backends
M: Eduardo Habkost <ehabkost@redhat.com>
M: Igor Mammedov <imammedo@redhat.com>
S: Maintained
F: backends/hostmem*.c
F: include/sysemu/hostmem.h
QAPI
M: Markus Armbruster <armbru@redhat.com>
M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1243,6 +1242,12 @@ F: docs/*qmp-*
F: scripts/qmp/
T: git git://repo.or.cz/qemu/armbru.git qapi-next
Register API
M: Alistair Francis <alistair.francis@xilinx.com>
S: Maintained
F: hw/core/register.c
F: include/hw/register.h
SLIRP
M: Samuel Thibault <samuel.thibault@ens-lyon.org>
M: Jan Kiszka <jan.kiszka@siemens.com>

View File

@@ -30,7 +30,7 @@ CONFIG_ALL=y
-include config-all-devices.mak
-include config-all-disas.mak
config-host.mak: $(SRC_PATH)/configure
config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios
@echo $@ is out-of-date, running configure
@# TODO: The next lines include code which supports a smooth
@# transition from old configurations without config.status.
@@ -185,7 +185,7 @@ qemu-version.h: FORCE
printf '""\n'; \
fi; \
fi) > $@.tmp)
$(call quiet-command, cmp --quiet $@ $@.tmp || mv $@.tmp $@)
$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)
config-host.h: config-host.h-timestamp
config-host.h-timestamp: config-host.mak
@@ -225,8 +225,9 @@ dtc/%:
$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
# Only keep -O and -g cflags
romsubdir-%:
$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/",)
$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/" CFLAGS="$(filter -O% -g%,$(CFLAGS))",)
ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
@@ -408,8 +409,7 @@ common de-ch es fo fr-ca hu ja mk nl-be pt sl tr \
bepo cz
ifdef INSTALL_BLOBS
BLOBS=bios.bin bios-256k.bin bios-fast.bin \
sgabios.bin vgabios.bin vgabios-cirrus.bin \
BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
acpi-dsdt.aml \
ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
@@ -417,9 +417,10 @@ pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
efi-e1000e.rom efi-vmxnet3.rom \
qemu-icon.bmp qemu_logo_no_text.svg \
bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
multiboot.bin linuxboot.bin kvmvapic.bin \
multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
s390-ccw.img \
spapr-rtas.bin slof.bin \
palcode-clipper \

View File

@@ -153,6 +153,7 @@ trace-events-y += hw/alpha/trace-events
trace-events-y += ui/trace-events
trace-events-y += audio/trace-events
trace-events-y += net/trace-events
trace-events-y += target-i386/trace-events
trace-events-y += target-sparc/trace-events
trace-events-y += target-s390x/trace-events
trace-events-y += target-ppc/trace-events

View File

@@ -212,7 +212,7 @@ hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $(TARGET_DIR)$@")
clean:
clean: clean-target
rm -f *.a *~ $(PROGS)
rm -f $(shell find . -name '*.[od]')
rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c

View File

@@ -1 +1 @@
2.6.50
2.7.50

View File

@@ -485,12 +485,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
return progress;
}
void aio_context_setup(AioContext *ctx, Error **errp)
void aio_context_setup(AioContext *ctx)
{
#ifdef CONFIG_EPOLL_CREATE1
assert(!ctx->epollfd);
ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
if (ctx->epollfd == -1) {
fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
ctx->epoll_available = false;
} else {
ctx->epoll_available = true;

View File

@@ -371,6 +371,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
return progress;
}
void aio_context_setup(AioContext *ctx, Error **errp)
void aio_context_setup(AioContext *ctx)
{
}

33
async.c
View File

@@ -29,6 +29,7 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
#include "block/raw-aio.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -217,7 +218,7 @@ aio_ctx_check(GSource *source)
for (bh = ctx->first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
return true;
}
}
}
return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
}
@@ -242,6 +243,14 @@ aio_ctx_finalize(GSource *source)
qemu_bh_delete(ctx->notify_dummy_bh);
thread_pool_free(ctx->thread_pool);
#ifdef CONFIG_LINUX_AIO
if (ctx->linux_aio) {
laio_detach_aio_context(ctx->linux_aio, ctx);
laio_cleanup(ctx->linux_aio);
ctx->linux_aio = NULL;
}
#endif
qemu_mutex_lock(&ctx->bh_lock);
while (ctx->first_bh) {
QEMUBH *next = ctx->first_bh->next;
@@ -282,6 +291,17 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
return ctx->thread_pool;
}
#ifdef CONFIG_LINUX_AIO
LinuxAioState *aio_get_linux_aio(AioContext *ctx)
{
if (!ctx->linux_aio) {
ctx->linux_aio = laio_init();
laio_attach_aio_context(ctx->linux_aio, ctx);
}
return ctx->linux_aio;
}
#endif
void aio_notify(AioContext *ctx)
{
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
@@ -327,14 +347,10 @@ AioContext *aio_context_new(Error **errp)
{
int ret;
AioContext *ctx;
Error *local_err = NULL;
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
aio_context_setup(ctx, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto fail;
}
aio_context_setup(ctx);
ret = event_notifier_init(&ctx->notifier, false);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to initialize event notifier");
@@ -345,6 +361,9 @@ AioContext *aio_context_new(Error **errp)
false,
(EventNotifierHandler *)
event_notifier_dummy_cb);
#ifdef CONFIG_LINUX_AIO
ctx->linux_aio = NULL;
#endif
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);

View File

@@ -1739,13 +1739,21 @@ static void audio_vm_change_state_handler (void *opaque, int running,
audio_reset_timer (s);
}
static void audio_atexit (void)
static bool is_cleaning_up;
bool audio_is_cleaning_up(void)
{
return is_cleaning_up;
}
void audio_cleanup(void)
{
AudioState *s = &glob_audio_state;
HWVoiceOut *hwo = NULL;
HWVoiceIn *hwi = NULL;
HWVoiceOut *hwo, *hwon;
HWVoiceIn *hwi, *hwin;
while ((hwo = audio_pcm_hw_find_any_out (hwo))) {
is_cleaning_up = true;
QLIST_FOREACH_SAFE(hwo, &glob_audio_state.hw_head_out, entries, hwon) {
SWVoiceCap *sc;
if (hwo->enabled) {
@@ -1761,17 +1769,20 @@ static void audio_atexit (void)
cb->ops.destroy (cb->opaque);
}
}
QLIST_REMOVE(hwo, entries);
}
while ((hwi = audio_pcm_hw_find_any_in (hwi))) {
QLIST_FOREACH_SAFE(hwi, &glob_audio_state.hw_head_in, entries, hwin) {
if (hwi->enabled) {
hwi->pcm_ops->ctl_in (hwi, VOICE_DISABLE);
}
hwi->pcm_ops->fini_in (hwi);
QLIST_REMOVE(hwi, entries);
}
if (s->drv) {
s->drv->fini (s->drv_opaque);
s->drv = NULL;
}
}
@@ -1799,7 +1810,7 @@ static void audio_init (void)
QLIST_INIT (&s->hw_head_out);
QLIST_INIT (&s->hw_head_in);
QLIST_INIT (&s->cap_head);
atexit (audio_atexit);
atexit(audio_cleanup);
s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
@@ -1966,8 +1977,7 @@ CaptureVoiceOut *AUD_add_capture (
QLIST_INSERT_HEAD (&s->cap_head, cap, entries);
QLIST_INSERT_HEAD (&cap->cb_head, cb, entries);
hw = NULL;
while ((hw = audio_pcm_hw_find_any_out (hw))) {
QLIST_FOREACH(hw, &glob_audio_state.hw_head_out, entries) {
audio_attach_capture (hw);
}
return cap;

View File

@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef QEMU_AUDIO_H
#define QEMU_AUDIO_H
@@ -162,4 +163,7 @@ static inline void *advance (void *p, int incr)
int wav_start_capture (CaptureState *s, const char *path, int freq,
int bits, int nchannels);
#endif /* audio.h */
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
#endif /* QEMU_AUDIO_H */

View File

@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef QEMU_AUDIO_INT_H
#define QEMU_AUDIO_INT_H
@@ -257,4 +258,4 @@ static inline int audio_ring_dist (int dst, int src, int len)
#define AUDIO_FUNC __FILE__ ":" AUDIO_STRINGIFY (__LINE__)
#endif
#endif /* audio_int.h */
#endif /* QEMU_AUDIO_INT_H */

View File

@@ -19,4 +19,4 @@ int audio_pt_wait (struct audio_pt *, const char *);
int audio_pt_unlock_and_signal (struct audio_pt *, const char *);
int audio_pt_join (struct audio_pt *, void **, const char *);
#endif /* audio_pt_int.h */
#endif /* QEMU_AUDIO_PT_INT_H */

View File

@@ -36,8 +36,6 @@
#define MAC_OS_X_VERSION_10_6 1060
#endif
static int isAtexit;
typedef struct {
int buffer_frames;
int nbuffers;
@@ -378,11 +376,6 @@ static inline UInt32 isPlaying (AudioDeviceID outputDeviceID)
return result;
}
static void coreaudio_atexit (void)
{
isAtexit = 1;
}
static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
{
int err;
@@ -630,7 +623,7 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
int err;
coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;
if (!isAtexit) {
if (!audio_is_cleaning_up()) {
/* stop playback */
if (isPlaying(core->outputDeviceID)) {
status = AudioDeviceStop(core->outputDeviceID, core->ioprocid);
@@ -673,7 +666,7 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
case VOICE_DISABLE:
/* stop playback */
if (!isAtexit) {
if (!audio_is_cleaning_up()) {
if (isPlaying(core->outputDeviceID)) {
status = AudioDeviceStop(core->outputDeviceID,
core->ioprocid);
@@ -697,7 +690,6 @@ static void *coreaudio_audio_init (void)
CoreaudioConf *conf = g_malloc(sizeof(CoreaudioConf));
*conf = glob_conf;
atexit(coreaudio_atexit);
return conf;
}

View File

@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef QEMU_MIXENG_H
#define QEMU_MIXENG_H
@@ -48,4 +49,4 @@ void st_rate_stop (void *opaque);
void mixeng_clear (struct st_sample *buf, int len);
void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol);
#endif /* mixeng.h */
#endif /* QEMU_MIXENG_H */

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# audio/alsaaudio.c
alsa_revents(int revents) "revents = %d"

View File

@@ -64,6 +64,14 @@ out:
error_propagate(errp, local_err);
}
static uint16List **host_memory_append_node(uint16List **node,
unsigned long value)
{
*node = g_malloc0(sizeof(**node));
(*node)->value = value;
return &(*node)->next;
}
static void
host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
@@ -74,13 +82,12 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
unsigned long value;
value = find_first_bit(backend->host_nodes, MAX_NODES);
if (value == MAX_NODES) {
return;
}
*node = g_malloc0(sizeof(**node));
(*node)->value = value;
node = &(*node)->next;
node = host_memory_append_node(node, value);
if (value == MAX_NODES) {
goto out;
}
do {
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
@@ -88,11 +95,10 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
break;
}
*node = g_malloc0(sizeof(**node));
(*node)->value = value;
node = &(*node)->next;
node = host_memory_append_node(node, value);
} while (true);
out:
visit_type_uint16List(v, name, &host_nodes, errp);
}
@@ -197,6 +203,7 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
static void host_memory_backend_set_prealloc(Object *obj, bool value,
Error **errp)
{
Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
if (backend->force_prealloc) {
@@ -217,7 +224,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
os_mem_prealloc(fd, ptr, sz);
os_mem_prealloc(fd, ptr, sz, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
backend->prealloc = true;
}
}
@@ -258,6 +269,16 @@ host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
return memory_region_size(&backend->mr) ? &backend->mr : NULL;
}
void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
{
backend->is_mapped = mapped;
}
bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
{
return backend->is_mapped;
}
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
{
@@ -270,8 +291,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
if (bc->alloc) {
bc->alloc(backend, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
goto out;
}
ptr = memory_region_get_ram_ptr(&backend->mr);
@@ -327,18 +347,21 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
* specified NUMA policy in place.
*/
if (backend->prealloc) {
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
&local_err);
if (local_err) {
goto out;
}
}
}
out:
error_propagate(errp, local_err);
}
static bool
host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
{
MemoryRegion *mr;
mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp);
if (memory_region_is_mapped(mr)) {
if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
return false;
} else {
return true;

View File

@@ -25,16 +25,51 @@
#include "qemu-common.h"
#include "sysemu/char.h"
#include "ui/console.h"
#include "ui/input.h"
#define MSMOUSE_LO6(n) ((n) & 0x3f)
#define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)
static void msmouse_event(void *opaque,
int dx, int dy, int dz, int buttons_state)
{
CharDriverState *chr = (CharDriverState *)opaque;
typedef struct {
CharDriverState *chr;
QemuInputHandlerState *hs;
int axis[INPUT_AXIS__MAX];
bool btns[INPUT_BUTTON__MAX];
bool btnc[INPUT_BUTTON__MAX];
uint8_t outbuf[32];
int outlen;
} MouseState;
static void msmouse_chr_accept_input(CharDriverState *chr)
{
MouseState *mouse = chr->opaque;
int len;
len = qemu_chr_be_can_write(chr);
if (len > mouse->outlen) {
len = mouse->outlen;
}
if (!len) {
return;
}
qemu_chr_be_write(chr, mouse->outbuf, len);
mouse->outlen -= len;
if (mouse->outlen) {
memmove(mouse->outbuf, mouse->outbuf + len, mouse->outlen);
}
}
static void msmouse_queue_event(MouseState *mouse)
{
unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
int dx, dy, count = 3;
dx = mouse->axis[INPUT_AXIS_X];
mouse->axis[INPUT_AXIS_X] = 0;
dy = mouse->axis[INPUT_AXIS_Y];
mouse->axis[INPUT_AXIS_Y] = 0;
/* Movement deltas */
bytes[0] |= (MSMOUSE_HI2(dy) << 2) | MSMOUSE_HI2(dx);
@@ -42,14 +77,54 @@ static void msmouse_event(void *opaque,
bytes[2] |= MSMOUSE_LO6(dy);
/* Buttons */
bytes[0] |= (buttons_state & 0x01 ? 0x20 : 0x00);
bytes[0] |= (buttons_state & 0x02 ? 0x10 : 0x00);
bytes[3] |= (buttons_state & 0x04 ? 0x20 : 0x00);
bytes[0] |= (mouse->btns[INPUT_BUTTON_LEFT] ? 0x20 : 0x00);
bytes[0] |= (mouse->btns[INPUT_BUTTON_RIGHT] ? 0x10 : 0x00);
if (mouse->btns[INPUT_BUTTON_MIDDLE] ||
mouse->btnc[INPUT_BUTTON_MIDDLE]) {
bytes[3] |= (mouse->btns[INPUT_BUTTON_MIDDLE] ? 0x20 : 0x00);
mouse->btnc[INPUT_BUTTON_MIDDLE] = false;
count = 4;
}
/* We always send the packet of, so that we do not have to keep track
of previous state of the middle button. This can potentially confuse
some very old drivers for two button mice though. */
qemu_chr_be_write(chr, bytes, 4);
if (mouse->outlen <= sizeof(mouse->outbuf) - count) {
memcpy(mouse->outbuf + mouse->outlen, bytes, count);
mouse->outlen += count;
} else {
/* queue full -> drop event */
}
}
static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
InputEvent *evt)
{
MouseState *mouse = (MouseState *)dev;
InputMoveEvent *move;
InputBtnEvent *btn;
switch (evt->type) {
case INPUT_EVENT_KIND_REL:
move = evt->u.rel.data;
mouse->axis[move->axis] += move->value;
break;
case INPUT_EVENT_KIND_BTN:
btn = evt->u.btn.data;
mouse->btns[btn->button] = btn->down;
mouse->btnc[btn->button] = true;
break;
default:
/* keep gcc happy */
break;
}
}
static void msmouse_input_sync(DeviceState *dev)
{
MouseState *mouse = (MouseState *)dev;
msmouse_queue_event(mouse);
msmouse_chr_accept_input(mouse->chr);
}
static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
@@ -60,26 +135,41 @@ static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int
static void msmouse_chr_close (struct CharDriverState *chr)
{
g_free (chr);
MouseState *mouse = chr->opaque;
qemu_input_handler_unregister(mouse->hs);
g_free(mouse);
g_free(chr);
}
static QemuInputHandler msmouse_handler = {
.name = "QEMU Microsoft Mouse",
.mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL,
.event = msmouse_input_event,
.sync = msmouse_input_sync,
};
static CharDriverState *qemu_chr_open_msmouse(const char *id,
ChardevBackend *backend,
ChardevReturn *ret,
Error **errp)
{
ChardevCommon *common = backend->u.msmouse.data;
MouseState *mouse;
CharDriverState *chr;
chr = qemu_chr_alloc(common, errp);
if (!chr) {
return NULL;
}
chr->chr_write = msmouse_chr_write;
chr->chr_close = msmouse_chr_close;
chr->chr_accept_input = msmouse_chr_accept_input;
chr->explicit_be_open = true;
qemu_add_mouse_event_handler(msmouse_event, chr, 0, "QEMU Microsoft Mouse");
mouse = g_new0(MouseState, 1);
mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
&msmouse_handler);
mouse->chr = chr;
chr->opaque = mouse;
return chr;
}

153
block.c
View File

@@ -25,6 +25,7 @@
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
#include "block/nbd.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qapi/qmp/qerror.h"
@@ -234,6 +235,8 @@ BlockDriverState *bdrv_new(void)
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
qemu_co_queue_init(&bs->flush_queue);
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
@@ -329,8 +332,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
/* Fast-path if already in coroutine context */
bdrv_create_co_entry(&cco);
} else {
co = qemu_coroutine_create(bdrv_create_co_entry);
qemu_coroutine_enter(co, &cco);
co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
qemu_coroutine_enter(co);
while (cco.ret == NOT_DONE) {
aio_poll(qemu_get_aio_context(), true);
}
@@ -536,9 +539,10 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
return drv;
}
static int find_image_format(BlockDriverState *bs, const char *filename,
static int find_image_format(BdrvChild *file, const char *filename,
BlockDriver **pdrv, Error **errp)
{
BlockDriverState *bs = file->bs;
BlockDriver *drv;
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
@@ -549,7 +553,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
return ret;
}
ret = bdrv_pread(bs, 0, buf, sizeof(buf));
ret = bdrv_pread(file, 0, buf, sizeof(buf));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read image for determining its "
"format");
@@ -937,7 +941,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
goto fail_opts;
}
bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -1017,7 +1020,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->request_alignment) || bdrv_is_sg(bs));
assert(is_power_of_2(bs->bl.request_alignment));
qemu_opts_del(opts);
return 0;
@@ -1653,7 +1656,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
/* Image format probing */
bs->probed = !drv;
if (!drv && file) {
ret = find_image_format(file->bs, filename, &drv, &local_err);
ret = find_image_format(file, filename, &drv, &local_err);
if (ret < 0) {
goto fail;
}
@@ -2184,9 +2187,9 @@ static void bdrv_close(BlockDriverState *bs)
bs->backing_file[0] = '\0';
bs->backing_format[0] = '\0';
bs->total_sectors = 0;
bs->encrypted = 0;
bs->valid_key = 0;
bs->sg = 0;
bs->encrypted = false;
bs->valid_key = false;
bs->sg = false;
QDECREF(bs->options);
QDECREF(bs->explicit_options);
bs->options = NULL;
@@ -2204,6 +2207,7 @@ static void bdrv_close(BlockDriverState *bs)
void bdrv_close_all(void)
{
block_job_cancel_sync_all();
nbd_export_close_all();
/* Drop references from requests still in flight, such as canceled block
* jobs whose AIO context has not been polled yet */
@@ -2323,116 +2327,6 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
return bs->drv->bdrv_check(bs, res, fix);
}
#define COMMIT_BUF_SECTORS 2048
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
if (!drv)
return -ENOMEDIUM;
if (!bs->backing) {
return -ENOTSUP;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
return -EBUSY;
}
ro = bs->backing->bs->read_only;
open_flags = bs->backing->bs->open_flags;
if (ro) {
if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
return -EACCES;
}
}
length = bdrv_getlength(bs);
if (length < 0) {
ret = length;
goto ro_cleanup;
}
backing_length = bdrv_getlength(bs->backing->bs);
if (backing_length < 0) {
ret = backing_length;
goto ro_cleanup;
}
/* If our top snapshot is larger than the backing file image,
* grow the backing file image if possible. If not possible,
* we must return an error */
if (length > backing_length) {
ret = bdrv_truncate(bs->backing->bs, length);
if (ret < 0) {
goto ro_cleanup;
}
}
total_sectors = length >> BDRV_SECTOR_BITS;
/* qemu_try_blockalign() for bs will choose an alignment that works for
* bs->backing->bs as well, so no need to compare the alignment manually. */
buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
if (buf == NULL) {
ret = -ENOMEM;
goto ro_cleanup;
}
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
if (ret < 0) {
goto ro_cleanup;
}
if (ret) {
ret = bdrv_read(bs, sector, buf, n);
if (ret < 0) {
goto ro_cleanup;
}
ret = bdrv_write(bs->backing->bs, sector, buf, n);
if (ret < 0) {
goto ro_cleanup;
}
}
}
if (drv->bdrv_make_empty) {
ret = drv->bdrv_make_empty(bs);
if (ret < 0) {
goto ro_cleanup;
}
bdrv_flush(bs);
}
/*
* Make sure all data we wrote to the backing device is actually
* stable on disk.
*/
if (bs->backing) {
bdrv_flush(bs->backing->bs);
}
ret = 0;
ro_cleanup:
qemu_vfree(buf);
if (ro) {
/* ignoring error return here */
bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
}
return ret;
}
/*
* Return values:
* 0 - success
@@ -2582,6 +2476,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
bdrv_dirty_bitmap_truncate(bs);
bdrv_parent_cb_resize(bs);
++bs->write_gen;
}
return ret;
}
@@ -2644,30 +2539,30 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
int bdrv_is_read_only(BlockDriverState *bs)
bool bdrv_is_read_only(BlockDriverState *bs)
{
return bs->read_only;
}
int bdrv_is_sg(BlockDriverState *bs)
bool bdrv_is_sg(BlockDriverState *bs)
{
return bs->sg;
}
int bdrv_is_encrypted(BlockDriverState *bs)
bool bdrv_is_encrypted(BlockDriverState *bs)
{
if (bs->backing && bs->backing->bs->encrypted) {
return 1;
return true;
}
return bs->encrypted;
}
int bdrv_key_required(BlockDriverState *bs)
bool bdrv_key_required(BlockDriverState *bs)
{
BdrvChild *backing = bs->backing;
if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
return 1;
return true;
}
return (bs->encrypted && !bs->valid_key);
}
@@ -2689,10 +2584,10 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
}
ret = bs->drv->bdrv_set_key(bs, key);
if (ret < 0) {
bs->valid_key = 0;
bs->valid_key = false;
} else if (!bs->valid_key) {
/* call the change callback now, we skipped it on open */
bs->valid_key = 1;
bs->valid_key = true;
bdrv_parent_cb_change_media(bs, true);
}
return ret;
@@ -2944,7 +2839,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
{
BlockDriverInfo bdi;
if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}

View File

@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-obj-y += null.o mirror.o io.o
block-obj-y += null.o mirror.o commit.o io.o
block-obj-y += throttle-groups.o
block-obj-y += nbd.o nbd-client.o sheepdog.o
@@ -26,7 +26,6 @@ block-obj-y += write-threshold.o
block-obj-y += crypto.o
common-obj-y += stream.o
common-obj-y += commit.o
common-obj-y += backup.o
iscsi.o-cflags := $(LIBISCSI_CFLAGS)

View File

@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
uint64_t sectors_read;
unsigned long *done_bitmap;
int64_t cluster_size;
bool compress;
NotifierWithReturn before_write;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
@@ -154,7 +155,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
ret = blk_co_pwritev(job->target, start * job->cluster_size,
bounce_qiov.size, &bounce_qiov, 0);
bounce_qiov.size, &bounce_qiov,
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
@@ -474,9 +476,10 @@ static void coroutine_fn backup_run(void *opaque)
block_job_defer_to_main_loop(&job->common, backup_complete, data);
}
void backup_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode sync_mode,
BdrvDirtyBitmap *sync_bitmap,
void backup_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
bool compress,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
@@ -507,6 +510,12 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
return;
}
if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
error_setg(errp, "Compression is not supported for this drive %s",
bdrv_get_device_name(target));
return;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
return;
}
@@ -541,7 +550,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
goto error;
}
job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp);
job = block_job_create(job_id, &backup_job_driver, bs, speed,
cb, opaque, errp);
if (!job) {
goto error;
}
@@ -554,6 +564,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
job->compress = compress;
/* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
@@ -575,9 +586,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
job->common.co = qemu_coroutine_create(backup_run, job);
block_job_txn_add_job(txn, &job->common);
qemu_coroutine_enter(job->common.co, job);
qemu_coroutine_enter(job->common.co);
return;
error:

View File

@@ -37,6 +37,10 @@
typedef struct BDRVBlkdebugState {
int state;
int new_state;
int align;
/* For blkdebug_refresh_filename() */
char *config_file;
QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
@@ -350,7 +354,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
BDRVBlkdebugState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
const char *config;
uint64_t align;
int ret;
@@ -363,8 +366,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Read rules from config file or command line options */
config = qemu_opt_get(opts, "config");
ret = read_config(s, config, options, errp);
s->config_file = g_strdup(qemu_opt_get(opts, "config"));
ret = read_config(s, s->config_file, options, errp);
if (ret) {
goto out;
}
@@ -382,10 +385,10 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Set request alignment */
align = qemu_opt_get_size(opts, "align", bs->request_alignment);
if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
bs->request_alignment = align;
} else {
align = qemu_opt_get_size(opts, "align", 0);
if (align < INT_MAX && is_power_of_2(align)) {
s->align = align;
} else if (align) {
error_setg(errp, "Invalid alignment");
ret = -EINVAL;
goto fail_unref;
@@ -397,6 +400,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
fail_unref:
bdrv_unref_child(bs, bs->file);
out:
if (ret < 0) {
g_free(s->config_file);
}
qemu_opts_del(opts);
return ret;
}
@@ -456,7 +462,7 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
cb, opaque);
}
@@ -479,7 +485,7 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
cb, opaque);
}
@@ -514,6 +520,8 @@ static void blkdebug_close(BlockDriverState *bs)
remove_rule(rule);
}
}
g_free(s->config_file);
}
static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -620,7 +628,7 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
if (!strcmp(r->tag, tag)) {
qemu_coroutine_enter(r->co, NULL);
qemu_coroutine_enter(r->co);
return 0;
}
}
@@ -646,7 +654,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
}
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
if (!strcmp(r->tag, tag)) {
qemu_coroutine_enter(r->co, NULL);
qemu_coroutine_enter(r->co);
ret = 0;
}
}
@@ -678,6 +686,7 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkdebugState *s = bs->opaque;
QDict *opts;
const QDictEntry *e;
bool force_json = false;
@@ -699,8 +708,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s",
qdict_get_try_str(options, "config") ?: "",
"blkdebug:%s:%s", s->config_file ?: "",
bs->file->bs->exact_filename);
}
@@ -720,6 +728,15 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
bs->full_open_options = opts;
}
static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVBlkdebugState *s = bs->opaque;
if (s->align) {
bs->bl.request_alignment = s->align;
}
}
static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
{
@@ -738,6 +755,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
.bdrv_refresh_limits = blkdebug_refresh_limits,
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,

View File

@@ -65,7 +65,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
static void blkreplay_bh_cb(void *opaque)
{
Request *req = opaque;
qemu_coroutine_enter(req->co, NULL);
qemu_coroutine_enter(req->co);
qemu_bh_delete(req->bh);
g_free(req);
}
@@ -81,22 +81,22 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
replay_block_event(req->bh, reqid);
}
static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
return ret;
}
static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -107,18 +107,18 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
return ret;
}
static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
uint64_t reqid = request_id++;
int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -144,11 +144,11 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_close = blkreplay_close,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_readv = blkreplay_co_readv,
.bdrv_co_writev = blkreplay_co_writev,
.bdrv_co_preadv = blkreplay_co_preadv,
.bdrv_co_pwritev = blkreplay_co_pwritev,
.bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes,
.bdrv_co_discard = blkreplay_co_discard,
.bdrv_co_pdiscard = blkreplay_co_pdiscard,
.bdrv_co_flush = blkreplay_co_flush,
};

View File

@@ -247,9 +247,9 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -262,9 +262,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
nb_sectors, cb, opaque);
bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}

View File

@@ -409,6 +409,22 @@ bool bdrv_has_blk(BlockDriverState *bs)
return bdrv_first_blk(bs) != NULL;
}
/*
* Returns true if @bs has only BlockBackends as parents.
*/
bool bdrv_is_root_node(BlockDriverState *bs)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->role != &child_root) {
return false;
}
}
return true;
}
/*
* Return @blk's DriveInfo if any, else null.
*/
@@ -727,21 +743,6 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
static int blk_check_request(BlockBackend *blk, int64_t sector_num,
int nb_sectors)
{
if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
return -EIO;
}
if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
return -EIO;
}
return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE);
}
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
@@ -760,7 +761,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
throttle_group_co_io_limits_intercept(blk, bytes, false);
}
return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
}
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -785,7 +786,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
flags |= BDRV_REQ_FUA;
}
return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
}
typedef struct BlkRwCo {
@@ -836,8 +837,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
co = qemu_coroutine_create(co_entry);
qemu_coroutine_enter(co, &rwco);
co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
aio_context = blk_get_aio_context(blk);
while (rwco.ret == NOT_DONE) {
@@ -870,6 +871,11 @@ int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
flags | BDRV_REQ_ZERO_WRITE);
}
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
return bdrv_make_zero(blk->root, flags);
}
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
@@ -945,8 +951,8 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
acb->bh = NULL;
acb->has_returned = false;
co = qemu_coroutine_create(co_entry);
qemu_coroutine_enter(co, acb);
co = qemu_coroutine_create(co_entry, acb);
qemu_coroutine_enter(co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
@@ -1060,16 +1066,16 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk,
return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
BlockAIOCB *blk_aio_discard(BlockBackend *blk,
int64_t sector_num, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -1101,14 +1107,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
return bdrv_co_pdiscard(blk_bs(blk), offset, count);
}
int blk_co_flush(BlockBackend *blk)
@@ -1168,6 +1174,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
return BLOCK_ERROR_ACTION_REPORT;
case BLOCKDEV_ON_ERROR_IGNORE:
return BLOCK_ERROR_ACTION_IGNORE;
case BLOCKDEV_ON_ERROR_AUTO:
default:
abort();
}
@@ -1303,15 +1310,16 @@ int blk_get_flags(BlockBackend *blk)
}
}
int blk_get_max_transfer_length(BlockBackend *blk)
/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint32_t max = 0;
if (bs) {
return bs->bl.max_transfer_length;
} else {
return 0;
max = bs->bl.max_transfer;
}
return MIN_NON_ZERO(max, INT_MAX);
}
int blk_get_max_iov(BlockBackend *blk)
@@ -1477,15 +1485,11 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
return ret;
}
return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
BDRV_REQ_WRITE_COMPRESSED);
}
int blk_truncate(BlockBackend *blk, int64_t offset)
@@ -1497,14 +1501,14 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return bdrv_truncate(blk_bs(blk), offset);
}
int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
return bdrv_pdiscard(blk_bs(blk), offset, count);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,

View File

@@ -104,10 +104,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
struct bochs_header bochs;
int ret;
bs->read_only = 1; // no write support yet
bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
bs->read_only = true; /* no write support yet */
ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
if (ret < 0) {
return ret;
}
@@ -141,7 +140,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
if (ret < 0) {
goto fail;
@@ -189,6 +188,11 @@ fail:
return ret;
}
static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
}
static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
{
BDRVBochsState *s = bs->opaque;
@@ -210,7 +214,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
(s->extent_blocks + s->bitmap_blocks));
/* read in bitmap for current extent */
ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
&bitmap_entry, 1);
if (ret < 0) {
return ret;
@@ -251,7 +255,7 @@ bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512);
if (block_offset > 0) {
ret = bdrv_co_preadv(bs->file->bs, block_offset, 512,
ret = bdrv_co_preadv(bs->file, block_offset, 512,
&local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -283,6 +287,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
};

View File

@@ -66,11 +66,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
bs->read_only = 1;
bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
bs->read_only = true;
/* read header */
ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
if (ret < 0) {
return ret;
}
@@ -96,7 +95,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
if (ret < 0) {
return ret;
}
@@ -127,7 +126,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
goto fail;
}
@@ -199,6 +198,11 @@ fail:
return ret;
}
static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
}
static inline int cloop_read_block(BlockDriverState *bs, int block_num)
{
BDRVCloopState *s = bs->opaque;
@@ -207,7 +211,7 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
int ret;
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
ret = bdrv_pread(bs->file, s->offsets[block_num],
s->compressed_block, bytes);
if (ret != bytes) {
return -1;
@@ -280,6 +284,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
};

View File

@@ -113,6 +113,7 @@ static void coroutine_fn commit_run(void *opaque)
CommitBlockJob *s = opaque;
CommitCompleteData *data;
int64_t sector_num, end;
uint64_t delay_ns = 0;
int ret = 0;
int n = 0;
void *buf = NULL;
@@ -142,10 +143,8 @@ static void coroutine_fn commit_run(void *opaque)
buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (sector_num = 0; sector_num < end; sector_num += n) {
uint64_t delay_ns = 0;
bool copy;
wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -161,19 +160,13 @@ wait:
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
if (copy) {
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (delay_ns > 0) {
goto wait;
}
}
ret = commit_populate(s->top, s->base, sector_num, n, buf);
bytes_written += n * BDRV_SECTOR_SIZE;
}
if (ret < 0) {
if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
s->on_error == BLOCKDEV_ON_ERROR_REPORT||
(s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
BlockErrorAction action =
block_job_error_action(&s->common, false, s->on_error, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT) {
goto out;
} else {
n = 0;
@@ -182,6 +175,10 @@ wait:
}
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
}
}
ret = 0;
@@ -211,8 +208,8 @@ static const BlockJobDriver commit_job_driver = {
.set_speed = commit_set_speed,
};
void commit_start(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *top, int64_t speed,
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, BlockCompletionFunc *cb,
void *opaque, const char *backing_file_str, Error **errp)
{
@@ -236,7 +233,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
return;
}
s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
s = block_job_create(job_id, &commit_job_driver, bs, speed,
cb, opaque, errp);
if (!s) {
return;
}
@@ -277,8 +275,129 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
s->common.co = qemu_coroutine_create(commit_run);
s->common.co = qemu_coroutine_create(commit_run, s);
trace_commit_start(bs, base, top, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
qemu_coroutine_enter(s->common.co);
}
#define COMMIT_BUF_SECTORS 2048
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
if (!drv)
return -ENOMEDIUM;
if (!bs->backing) {
return -ENOTSUP;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
return -EBUSY;
}
ro = bs->backing->bs->read_only;
open_flags = bs->backing->bs->open_flags;
if (ro) {
if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
return -EACCES;
}
}
src = blk_new();
blk_insert_bs(src, bs);
backing = blk_new();
blk_insert_bs(backing, bs->backing->bs);
length = blk_getlength(src);
if (length < 0) {
ret = length;
goto ro_cleanup;
}
backing_length = blk_getlength(backing);
if (backing_length < 0) {
ret = backing_length;
goto ro_cleanup;
}
/* If our top snapshot is larger than the backing file image,
* grow the backing file image if possible. If not possible,
* we must return an error */
if (length > backing_length) {
ret = blk_truncate(backing, length);
if (ret < 0) {
goto ro_cleanup;
}
}
total_sectors = length >> BDRV_SECTOR_BITS;
/* blk_try_blockalign() for src will choose an alignment that works for
* backing as well, so no need to compare the alignment manually. */
buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
if (buf == NULL) {
ret = -ENOMEM;
goto ro_cleanup;
}
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
if (ret < 0) {
goto ro_cleanup;
}
if (ret) {
ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto ro_cleanup;
}
ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE, 0);
if (ret < 0) {
goto ro_cleanup;
}
}
}
if (drv->bdrv_make_empty) {
ret = drv->bdrv_make_empty(bs);
if (ret < 0) {
goto ro_cleanup;
}
blk_flush(src);
}
/*
* Make sure all data we wrote to the backing device is actually
* stable on disk.
*/
blk_flush(backing);
ret = 0;
ro_cleanup:
qemu_vfree(buf);
blk_unref(src);
blk_unref(backing);
if (ro) {
/* ignoring error return here */
bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
}
return ret;
}

View File

@@ -64,7 +64,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
BlockDriverState *bs = opaque;
ssize_t ret;
ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
ret = bdrv_pread(bs->file, offset, buf, buflen);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read encryption header");
return ret;
@@ -193,17 +193,16 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
Error **errp)
{
OptsVisitor *ov;
Visitor *v;
QCryptoBlockOpenOptions *ret = NULL;
Error *local_err = NULL;
ret = g_new0(QCryptoBlockOpenOptions, 1);
ret->format = format;
ov = opts_visitor_new(opts);
v = opts_visitor_new(opts);
visit_start_struct(opts_get_visitor(ov),
NULL, NULL, 0, &local_err);
visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
goto out;
}
@@ -211,7 +210,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
switch (format) {
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
visit_type_QCryptoBlockOptionsLUKS_members(
opts_get_visitor(ov), &ret->u.luks, &local_err);
v, &ret->u.luks, &local_err);
break;
default:
@@ -219,10 +218,10 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
break;
}
if (!local_err) {
visit_check_struct(opts_get_visitor(ov), &local_err);
visit_check_struct(v, &local_err);
}
visit_end_struct(opts_get_visitor(ov));
visit_end_struct(v, NULL);
out:
if (local_err) {
@@ -230,7 +229,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
qapi_free_QCryptoBlockOpenOptions(ret);
ret = NULL;
}
opts_visitor_cleanup(ov);
visit_free(v);
return ret;
}
@@ -240,17 +239,16 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
Error **errp)
{
OptsVisitor *ov;
Visitor *v;
QCryptoBlockCreateOptions *ret = NULL;
Error *local_err = NULL;
ret = g_new0(QCryptoBlockCreateOptions, 1);
ret->format = format;
ov = opts_visitor_new(opts);
v = opts_visitor_new(opts);
visit_start_struct(opts_get_visitor(ov),
NULL, NULL, 0, &local_err);
visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
goto out;
}
@@ -258,7 +256,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
switch (format) {
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
visit_type_QCryptoBlockCreateOptionsLUKS_members(
opts_get_visitor(ov), &ret->u.luks, &local_err);
v, &ret->u.luks, &local_err);
break;
default:
@@ -266,10 +264,10 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
break;
}
if (!local_err) {
visit_check_struct(opts_get_visitor(ov), &local_err);
visit_check_struct(v, &local_err);
}
visit_end_struct(opts_get_visitor(ov));
visit_end_struct(v, NULL);
out:
if (local_err) {
@@ -277,7 +275,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
qapi_free_QCryptoBlockCreateOptions(ret);
ret = NULL;
}
opts_visitor_cleanup(ov);
visit_free(v);
return ret;
}
@@ -322,8 +320,8 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
goto cleanup;
}
bs->encrypted = 1;
bs->valid_key = 1;
bs->encrypted = true;
bs->valid_key = true;
ret = 0;
cleanup:
@@ -428,7 +426,7 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
ret = bdrv_co_readv(bs->file->bs,
ret = bdrv_co_readv(bs->file,
payload_offset + sector_num,
cur_nr_sectors, &hd_qiov);
if (ret < 0) {
@@ -507,7 +505,7 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
ret = bdrv_co_writev(bs->file->bs,
ret = bdrv_co_writev(bs->file,
payload_offset + sector_num,
cur_nr_sectors, &hd_qiov);
if (ret < 0) {
@@ -565,6 +563,53 @@ static int block_crypto_create_luks(const char *filename,
filename, opts, errp);
}
static int block_crypto_get_info_luks(BlockDriverState *bs,
BlockDriverInfo *bdi)
{
BlockDriverInfo subbdi;
int ret;
ret = bdrv_get_info(bs->file->bs, &subbdi);
if (ret != 0) {
return ret;
}
bdi->unallocated_blocks_are_zero = false;
bdi->can_write_zeroes_with_unmap = false;
bdi->cluster_size = subbdi.cluster_size;
return 0;
}
static ImageInfoSpecific *
block_crypto_get_specific_info_luks(BlockDriverState *bs)
{
BlockCrypto *crypto = bs->opaque;
ImageInfoSpecific *spec_info;
QCryptoBlockInfo *info;
info = qcrypto_block_get_info(crypto->block, NULL);
if (!info) {
return NULL;
}
if (info->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
qapi_free_QCryptoBlockInfo(info);
return NULL;
}
spec_info = g_new(ImageInfoSpecific, 1);
spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS;
spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1);
*spec_info->u.luks.data = info->u.luks;
/* Blank out pointers we've just stolen to avoid double free */
memset(&info->u.luks, 0, sizeof(info->u.luks));
qapi_free_QCryptoBlockInfo(info);
return spec_info;
}
BlockDriver bdrv_crypto_luks = {
.format_name = "luks",
.instance_size = sizeof(BlockCrypto),
@@ -578,6 +623,8 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_co_readv = block_crypto_co_readv,
.bdrv_co_writev = block_crypto_co_writev,
.bdrv_getlength = block_crypto_getlength,
.bdrv_get_info = block_crypto_get_info_luks,
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
};
static void block_crypto_init(void)

View File

@@ -169,7 +169,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
state->sock_fd = fd;
s = state->s;
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
switch (action) {
case CURL_POLL_IN:
aio_set_fd_handler(s->aio_context, fd, false,

View File

@@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int nr_sectors)
int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
@@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {

View File

@@ -86,7 +86,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
uint64_t buffer;
int ret;
ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
ret = bdrv_pread(bs->file, offset, &buffer, 8);
if (ret < 0) {
return ret;
}
@@ -100,7 +100,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
uint32_t buffer;
int ret;
ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
ret = bdrv_pread(bs->file, offset, &buffer, 4);
if (ret < 0) {
return ret;
}
@@ -153,8 +153,9 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
}
}
static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp)
{
BlockDriverState *file_bs = file->bs;
int64_t length;
int64_t offset = 0;
uint8_t buffer[515];
@@ -178,7 +179,7 @@ static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
offset = length - 511 - 512;
}
length = length < 515 ? length : 515;
ret = bdrv_pread(file_bs, offset, buffer, length);
ret = bdrv_pread(file, offset, buffer, length);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
return ret;
@@ -355,7 +356,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
offset += 4;
buffer = g_realloc(buffer, count);
ret = bdrv_pread(bs->file->bs, offset, buffer, count);
ret = bdrv_pread(bs->file, offset, buffer, count);
if (ret < 0) {
goto fail;
}
@@ -392,7 +393,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
buffer = g_malloc(info_length + 1);
buffer[info_length] = '\0';
ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
if (ret != info_length) {
ret = -EINVAL;
goto fail;
@@ -438,8 +439,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset;
int ret;
bs->read_only = 1;
bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
bs->read_only = true;
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -449,7 +449,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
ds.max_sectors_per_chunk = 1;
/* locate the UDIF trailer */
offset = dmg_find_koly_offset(bs->file->bs, errp);
offset = dmg_find_koly_offset(bs->file, errp);
if (offset < 0) {
ret = offset;
goto fail;
@@ -547,6 +547,11 @@ fail:
return ret;
}
static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
}
static inline int is_sector_in_chunk(BDRVDMGState* s,
uint32_t chunk_num, uint64_t sector_num)
{
@@ -595,7 +600,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -619,7 +624,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000006: /* bzip2 compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -644,7 +649,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
break;
#endif /* CONFIG_BZIP2 */
case 1: /* copy */
ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -720,6 +725,7 @@ static BlockDriver bdrv_dmg = {
.instance_size = sizeof(BDRVDMGState),
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};

View File

@@ -11,7 +11,27 @@
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#define GLUSTER_OPT_FILENAME "filename"
#define GLUSTER_OPT_VOLUME "volume"
#define GLUSTER_OPT_PATH "path"
#define GLUSTER_OPT_TYPE "type"
#define GLUSTER_OPT_SERVER_PATTERN "server."
#define GLUSTER_OPT_HOST "host"
#define GLUSTER_OPT_PORT "port"
#define GLUSTER_OPT_TO "to"
#define GLUSTER_OPT_IPV4 "ipv4"
#define GLUSTER_OPT_IPV6 "ipv6"
#define GLUSTER_OPT_SOCKET "socket"
#define GLUSTER_OPT_DEBUG "debug"
#define GLUSTER_DEFAULT_PORT 24007
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
typedef struct GlusterAIOCB {
int64_t size;
@@ -28,27 +48,141 @@ typedef struct BDRVGlusterState {
int debug_level;
} BDRVGlusterState;
typedef struct GlusterConf {
char *server;
int port;
char *volname;
char *image;
char *transport;
int debug_level;
} GlusterConf;
typedef struct BDRVGlusterReopenState {
struct glfs *glfs;
struct glfs_fd *fd;
} BDRVGlusterReopenState;
static void qemu_gluster_gconf_free(GlusterConf *gconf)
{
if (gconf) {
g_free(gconf->server);
g_free(gconf->volname);
g_free(gconf->image);
g_free(gconf->transport);
g_free(gconf);
static QemuOptsList qemu_gluster_create_opts = {
.name = "qemu-gluster-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{
.name = BLOCK_OPT_PREALLOC,
.type = QEMU_OPT_STRING,
.help = "Preallocation mode (allowed values: off, full)"
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
}
}
};
static int parse_volume_options(GlusterConf *gconf, char *path)
static QemuOptsList runtime_opts = {
.name = "gluster",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = GLUSTER_OPT_FILENAME,
.type = QEMU_OPT_STRING,
.help = "URL to the gluster image",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_json_opts = {
.name = "gluster_json",
.head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
.desc = {
{
.name = GLUSTER_OPT_VOLUME,
.type = QEMU_OPT_STRING,
.help = "name of gluster volume where VM image resides",
},
{
.name = GLUSTER_OPT_PATH,
.type = QEMU_OPT_STRING,
.help = "absolute path to image file in gluster volume",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_type_opts = {
.name = "gluster_type",
.head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
.desc = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_unix_opts = {
.name = "gluster_unix",
.head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
.desc = {
{
.name = GLUSTER_OPT_SOCKET,
.type = QEMU_OPT_STRING,
.help = "socket file path)",
},
{ /* end of list */ }
},
};
static QemuOptsList runtime_tcp_opts = {
.name = "gluster_tcp",
.head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
.desc = {
{
.name = GLUSTER_OPT_TYPE,
.type = QEMU_OPT_STRING,
.help = "tcp|unix",
},
{
.name = GLUSTER_OPT_HOST,
.type = QEMU_OPT_STRING,
.help = "host address (hostname/ipv4/ipv6 addresses)",
},
{
.name = GLUSTER_OPT_PORT,
.type = QEMU_OPT_NUMBER,
.help = "port number on which glusterd is listening (default 24007)",
},
{
.name = "to",
.type = QEMU_OPT_NUMBER,
.help = "max port number, not supported by gluster",
},
{
.name = "ipv4",
.type = QEMU_OPT_BOOL,
.help = "ipv4 bool value, not supported by gluster",
},
{
.name = "ipv6",
.type = QEMU_OPT_BOOL,
.help = "ipv6 bool value, not supported by gluster",
},
{ /* end of list */ }
},
};
static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
{
char *p, *q;
@@ -62,31 +196,29 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
if (*p == '\0') {
return -EINVAL;
}
gconf->volname = g_strndup(q, p - q);
gconf->volume = g_strndup(q, p - q);
/* image */
/* path */
p += strspn(p, "/");
if (*p == '\0') {
return -EINVAL;
}
gconf->image = g_strdup(p);
gconf->path = g_strdup(p);
return 0;
}
/*
* file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
* file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
*
* 'gluster' is the protocol.
*
* 'transport' specifies the transport type used to connect to gluster
* management daemon (glusterd). Valid transport types are
* tcp, unix and rdma. If a transport type isn't specified, then tcp
* type is assumed.
* tcp or unix. If a transport type isn't specified, then tcp type is assumed.
*
* 'server' specifies the server where the volume file specification for
* the given volume resides. This can be either hostname, ipv4 address
* or ipv6 address. ipv6 address needs to be within square brackets [ ].
* If transport type is 'unix', then 'server' field should not be specified.
* 'host' specifies the host where the volume file specification for
* the given volume resides. This can be either hostname or ipv4 address.
* If transport type is 'unix', then 'host' field should not be specified.
* The 'socket' field needs to be populated with the path to unix domain
* socket.
*
@@ -95,23 +227,22 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
* default port. If the transport type is unix, then 'port' should not be
* specified.
*
* 'volname' is the name of the gluster volume which contains the VM image.
* 'volume' is the name of the gluster volume which contains the VM image.
*
* 'image' is the path to the actual VM image that resides on gluster volume.
* 'path' is the path to the actual VM image that resides on gluster volume.
*
* Examples:
*
* file=gluster://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
* file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
* file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
* file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
* file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
* file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
* file=gluster+rdma://1.2.3.4:24007/testvol/a.img
*/
static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
const char *filename)
{
GlusterServer *gsconf;
URI *uri;
QueryParams *qp = NULL;
bool is_unix = false;
@@ -122,16 +253,21 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
return -EINVAL;
}
gconf->server = g_new0(GlusterServerList, 1);
gconf->server->value = gsconf = g_new0(GlusterServer, 1);
/* transport */
if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
gconf->transport = g_strdup("tcp");
gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+tcp")) {
gconf->transport = g_strdup("tcp");
gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+unix")) {
gconf->transport = g_strdup("unix");
gsconf->type = GLUSTER_TRANSPORT_UNIX;
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
gconf->transport = g_strdup("rdma");
gsconf->type = GLUSTER_TRANSPORT_TCP;
error_report("Warning: rdma feature is not supported, falling "
"back to tcp");
} else {
ret = -EINVAL;
goto out;
@@ -157,10 +293,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
ret = -EINVAL;
goto out;
}
gconf->server = g_strdup(qp->p[0].value);
gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
} else {
gconf->server = g_strdup(uri->server ? uri->server : "localhost");
gconf->port = uri->port;
gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
if (uri->port) {
gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
} else {
gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
}
}
out:
@@ -171,30 +311,34 @@ out:
return ret;
}
static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
Error **errp)
static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
Error **errp)
{
struct glfs *glfs = NULL;
struct glfs *glfs;
int ret;
int old_errno;
GlusterServerList *server;
ret = qemu_gluster_parseuri(gconf, filename);
if (ret < 0) {
error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
"volname/image[?socket=...]");
errno = -ret;
goto out;
}
glfs = glfs_new(gconf->volname);
glfs = glfs_new(gconf->volume);
if (!glfs) {
goto out;
}
ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
gconf->port);
if (ret < 0) {
goto out;
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
server->value->u.q_unix.path, 0);
} else {
ret = glfs_set_volfile_server(glfs,
GlusterTransport_lookup[server->value->type],
server->value->u.tcp.host,
atoi(server->value->u.tcp.port));
}
if (ret < 0) {
goto out;
}
}
ret = glfs_set_logging(glfs, "-", gconf->debug_level);
@@ -204,15 +348,25 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
ret = glfs_init(glfs);
if (ret) {
error_setg_errno(errp, errno,
"Gluster connection failed for server=%s port=%d "
"volume=%s image=%s transport=%s", gconf->server,
gconf->port, gconf->volname, gconf->image,
gconf->transport);
error_setg(errp, "Gluster connection for volume %s, path %s failed"
" to connect", gconf->volume, gconf->path);
for (server = gconf->server; server; server = server->next) {
if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
error_append_hint(errp, "hint: failed on socket %s ",
server->value->u.q_unix.path);
} else {
error_append_hint(errp, "hint: failed on host %s and port %s ",
server->value->u.tcp.host,
server->value->u.tcp.port);
}
}
error_append_hint(errp, "Please refer to gluster logs for more info\n");
/* glfs_init sometimes doesn't set errno although docs suggest that */
if (errno == 0)
if (errno == 0) {
errno = EINVAL;
}
goto out;
}
@@ -227,13 +381,233 @@ out:
return NULL;
}
static int qapi_enum_parse(const char *opt)
{
int i;
if (!opt) {
return GLUSTER_TRANSPORT__MAX;
}
for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
if (!strcmp(opt, GlusterTransport_lookup[i])) {
return i;
}
}
return i;
}
/*
* Convert the json formatted command line into qapi.
*/
static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
QDict *options, Error **errp)
{
QemuOpts *opts;
GlusterServer *gsconf;
GlusterServerList *curr = NULL;
QDict *backing_options = NULL;
Error *local_err = NULL;
char *str = NULL;
const char *ptr;
size_t num_servers;
int i;
/* create opts info from runtime_json_opts list */
opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
goto out;
}
num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
if (num_servers < 1) {
error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
goto out;
}
gconf->volume = g_strdup(ptr);
ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
goto out;
}
gconf->path = g_strdup(ptr);
qemu_opts_del(opts);
for (i = 0; i < num_servers; i++) {
str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
qdict_extract_subqdict(options, &backing_options, str);
/* create opts info from runtime_type_opts list */
opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
gsconf = g_new0(GlusterServer, 1);
gsconf->type = qapi_enum_parse(ptr);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
GLUSTER_OPT_TYPE, "tcp or unix");
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
qemu_opts_del(opts);
if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
/* create opts info from runtime_tcp_opts list */
opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_HOST);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.host = g_strdup(ptr);
ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_PORT);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.tcp.port = g_strdup(ptr);
/* defend for unsupported fields in InetSocketAddress,
* i.e. @ipv4, @ipv6 and @to
*/
ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
if (ptr) {
gsconf->u.tcp.has_to = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
if (ptr) {
gsconf->u.tcp.has_ipv4 = true;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
if (ptr) {
gsconf->u.tcp.has_ipv6 = true;
}
if (gsconf->u.tcp.has_to) {
error_setg(&local_err, "Parameter 'to' not supported");
goto out;
}
if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
goto out;
}
qemu_opts_del(opts);
} else {
/* create opts info from runtime_unix_opts list */
opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, backing_options, &local_err);
if (local_err) {
goto out;
}
ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
if (!ptr) {
error_setg(&local_err, QERR_MISSING_PARAMETER,
GLUSTER_OPT_SOCKET);
error_append_hint(&local_err, GERR_INDEX_HINT, i);
goto out;
}
gsconf->u.q_unix.path = g_strdup(ptr);
qemu_opts_del(opts);
}
if (gconf->server == NULL) {
gconf->server = g_new0(GlusterServerList, 1);
gconf->server->value = gsconf;
curr = gconf->server;
} else {
curr->next = g_new0(GlusterServerList, 1);
curr->next->value = gsconf;
curr = curr->next;
}
qdict_del(backing_options, str);
g_free(str);
str = NULL;
}
return 0;
out:
error_propagate(errp, local_err);
qemu_opts_del(opts);
if (str) {
qdict_del(backing_options, str);
g_free(str);
}
errno = EINVAL;
return -errno;
}
static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
const char *filename,
QDict *options, Error **errp)
{
int ret;
if (filename) {
ret = qemu_gluster_parse_uri(gconf, filename);
if (ret < 0) {
error_setg(errp, "invalid URI");
error_append_hint(errp, "Usage: file=gluster[+transport]://"
"[host[:port]]/volume/path[?socket=...]\n");
errno = -ret;
return NULL;
}
} else {
ret = qemu_gluster_parse_json(gconf, options, errp);
if (ret < 0) {
error_append_hint(errp, "Usage: "
"-drive driver=qcow2,file.driver=gluster,"
"file.volume=testvol,file.path=/path/a.qcow2"
"[,file.debug=9],file.server.0.type=tcp,"
"file.server.0.host=1.2.3.4,"
"file.server.0.port=24007,"
"file.server.1.transport=unix,"
"file.server.1.socket=/var/run/glusterd.socket ..."
"\n");
errno = -ret;
return NULL;
}
}
return qemu_gluster_glfs_init(gconf, errp);
}
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_bh_delete(acb->bh);
acb->bh = NULL;
qemu_coroutine_enter(acb->coroutine, NULL);
qemu_coroutine_enter(acb->coroutine);
}
/*
@@ -255,30 +629,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
qemu_bh_schedule(acb->bh);
}
#define GLUSTER_OPT_FILENAME "filename"
#define GLUSTER_OPT_DEBUG "debug"
#define GLUSTER_DEBUG_DEFAULT 4
#define GLUSTER_DEBUG_MAX 9
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "gluster",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = GLUSTER_OPT_FILENAME,
.type = QEMU_OPT_STRING,
.help = "URL to the gluster image",
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
},
};
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -324,7 +674,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
BDRVGlusterState *s = bs->opaque;
int open_flags = 0;
int ret = 0;
GlusterConf *gconf = g_new0(GlusterConf, 1);
BlockdevOptionsGluster *gconf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -347,8 +697,10 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
s->debug_level = GLUSTER_DEBUG_MAX;
}
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = s->debug_level;
s->glfs = qemu_gluster_init(gconf, filename, errp);
gconf->has_debug_level = true;
s->glfs = qemu_gluster_init(gconf, filename, options, errp);
if (!s->glfs) {
ret = -errno;
goto out;
@@ -373,7 +725,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
s->fd = glfs_open(s->glfs, gconf->image, open_flags);
s->fd = glfs_open(s->glfs, gconf->path, open_flags);
if (!s->fd) {
ret = -errno;
}
@@ -382,7 +734,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
out:
qemu_opts_del(opts);
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
if (!ret) {
return ret;
}
@@ -395,19 +747,13 @@ out:
return ret;
}
typedef struct BDRVGlusterReopenState {
struct glfs *glfs;
struct glfs_fd *fd;
} BDRVGlusterReopenState;
static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
int ret = 0;
BDRVGlusterState *s;
BDRVGlusterReopenState *reop_s;
GlusterConf *gconf = NULL;
BlockdevOptionsGluster *gconf;
int open_flags = 0;
assert(state != NULL);
@@ -420,10 +766,10 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
qemu_gluster_parse_flags(state->flags, &open_flags);
gconf = g_new0(GlusterConf, 1);
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = s->debug_level;
reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
gconf->has_debug_level = true;
reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
if (reop_s->glfs == NULL) {
ret = -errno;
goto exit;
@@ -439,7 +785,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
}
#endif
reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
ret = -errno;
@@ -448,7 +794,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
exit:
/* state->opaque will be freed in either the _abort or _commit */
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
return ret;
}
@@ -501,7 +847,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
#ifdef CONFIG_GLUSTERFS_ZEROFILL
static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int size, BdrvRequestFlags flags)
int64_t offset,
int size,
BdrvRequestFlags flags)
{
int ret;
GlusterAIOCB acb;
@@ -527,7 +875,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
int64_t size)
{
return glfs_zerofill(fd, offset, size);
}
@@ -539,7 +887,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
int64_t size)
int64_t size)
{
return 0;
}
@@ -548,14 +896,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
static int qemu_gluster_create(const char *filename,
QemuOpts *opts, Error **errp)
{
BlockdevOptionsGluster *gconf;
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
int64_t total_size = 0;
char *tmp = NULL;
GlusterConf *gconf = g_new0(GlusterConf, 1);
gconf = g_new0(BlockdevOptionsGluster, 1);
gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
GLUSTER_DEBUG_DEFAULT);
if (gconf->debug_level < 0) {
@@ -563,8 +912,9 @@ static int qemu_gluster_create(const char *filename,
} else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
gconf->debug_level = GLUSTER_DEBUG_MAX;
}
gconf->has_debug_level = true;
glfs = qemu_gluster_init(gconf, filename, errp);
glfs = qemu_gluster_init(gconf, filename, NULL, errp);
if (!glfs) {
ret = -errno;
goto out;
@@ -576,19 +926,17 @@ static int qemu_gluster_create(const char *filename,
tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
if (!tmp || !strcmp(tmp, "off")) {
prealloc = 0;
} else if (!strcmp(tmp, "full") &&
gluster_supports_zerofill()) {
} else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
prealloc = 1;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'"
" or GlusterFS doesn't support zerofill API",
tmp);
" or GlusterFS doesn't support zerofill API", tmp);
ret = -EINVAL;
goto out;
}
fd = glfs_creat(glfs, gconf->image,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
fd = glfs_creat(glfs, gconf->path,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (!fd) {
ret = -errno;
} else {
@@ -606,7 +954,7 @@ static int qemu_gluster_create(const char *filename,
}
out:
g_free(tmp);
qemu_gluster_gconf_free(gconf);
qapi_free_BlockdevOptionsGluster(gconf);
if (glfs) {
glfs_fini(glfs);
}
@@ -614,7 +962,8 @@ out:
}
static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int write)
{
int ret;
GlusterAIOCB acb;
@@ -629,10 +978,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
gluster_finish_aiocb, &acb);
gluster_finish_aiocb, &acb);
}
if (ret < 0) {
@@ -657,13 +1006,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
}
static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
int64_t sector_num,
int nb_sectors,
QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
}
static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
int64_t sector_num,
int nb_sectors,
QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
@@ -724,14 +1077,12 @@ error:
}
#ifdef CONFIG_GLUSTERFS_DISCARD
static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
int64_t offset, int size)
{
int ret;
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
acb.size = 0;
acb.ret = 0;
@@ -934,34 +1285,11 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status(
}
static QemuOptsList qemu_gluster_create_opts = {
.name = "qemu-gluster-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{
.name = BLOCK_OPT_PREALLOC,
.type = QEMU_OPT_STRING,
.help = "Preallocation mode (allowed values: off, full)"
},
{
.name = GLUSTER_OPT_DEBUG,
.type = QEMU_OPT_NUMBER,
.help = "Gluster log level, valid range is 0-9",
},
{ /* end of list */ }
}
};
static BlockDriver bdrv_gluster = {
.format_name = "gluster",
.protocol_name = "gluster",
.instance_size = sizeof(BDRVGlusterState),
.bdrv_needs_filename = true,
.bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -976,7 +1304,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -989,7 +1317,7 @@ static BlockDriver bdrv_gluster_tcp = {
.format_name = "gluster",
.protocol_name = "gluster+tcp",
.instance_size = sizeof(BDRVGlusterState),
.bdrv_needs_filename = true,
.bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -1004,7 +1332,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -1032,7 +1360,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
@@ -1041,6 +1369,12 @@ static BlockDriver bdrv_gluster_unix = {
.create_opts = &qemu_gluster_create_opts,
};
/* rdma is deprecated (actually never supported for volfile fetch).
* Let's maintain it for the protocol compatibility, to make sure things
* won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
* protocol with a warning.
* TODO: remove gluster+rdma interface support
*/
static BlockDriver bdrv_gluster_rdma = {
.format_name = "gluster",
.protocol_name = "gluster+rdma",
@@ -1060,7 +1394,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_co_discard = qemu_gluster_co_discard,
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
* Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
* Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -46,7 +46,6 @@
#ifdef __linux__
#include <scsi/sg.h>
#include <block/scsi.h>
#endif
typedef struct IscsiLun {
@@ -62,7 +61,23 @@ typedef struct IscsiLun {
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
unsigned long *allocationmap;
/* The allocmap tracks which clusters (pages) on the iSCSI target are
* allocated and which are not. In case a target returns zeros for
* unallocated pages (iscsilun->lprz) we can directly return zeros instead
* of reading zeros over the wire if a read request falls within an
* unallocated block. As there are 3 possible states we need 2 bitmaps to
* track. allocmap_valid keeps track if QEMU's information about a page is
* valid. allocmap tracks if a page is allocated or not. In case QEMU has no
* valid information about a page the corresponding allocmap entry should be
* switched to unallocated as well to force a new lookup of the allocation
* status as lookups are generally skipped if a page is suspect to be
* allocated. If a iSCSI target is opened with cache.direct = on the
* allocmap_valid does not exist turning all cached information invalid so
* that a fresh lookup is made for any page even if allocmap entry returns
* it's unallocated. */
unsigned long *allocmap;
unsigned long *allocmap_valid;
long allocmap_size;
int cluster_sectors;
bool use_16_for_rw;
bool write_protected;
@@ -153,7 +168,7 @@ static void iscsi_co_generic_bh_cb(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_bh_delete(iTask->bh);
qemu_coroutine_enter(iTask->co, NULL);
qemu_coroutine_enter(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -161,7 +176,7 @@ static void iscsi_retry_timer_expired(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
qemu_coroutine_enter(iTask->co, NULL);
qemu_coroutine_enter(iTask->co);
}
}
@@ -423,37 +438,135 @@ static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
iscsilun);
}
static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
static void iscsi_allocmap_free(IscsiLun *iscsilun)
{
return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
iscsilun),
iscsilun->cluster_sectors));
g_free(iscsilun->allocmap);
g_free(iscsilun->allocmap_valid);
iscsilun->allocmap = NULL;
iscsilun->allocmap_valid = NULL;
}
static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
{
if (iscsilun->allocationmap == NULL) {
return;
iscsi_allocmap_free(iscsilun);
iscsilun->allocmap_size =
DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
iscsilun->cluster_sectors);
iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
if (!iscsilun->allocmap) {
return -ENOMEM;
}
bitmap_set(iscsilun->allocationmap,
sector_num / iscsilun->cluster_sectors,
DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
if (open_flags & BDRV_O_NOCACHE) {
/* in case that cache.direct = on all allocmap entries are
* treated as invalid to force a relookup of the block
* status on every read request */
return 0;
}
iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
if (!iscsilun->allocmap_valid) {
/* if we are under memory pressure free the allocmap as well */
iscsi_allocmap_free(iscsilun);
return -ENOMEM;
}
return 0;
}
static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
static void
iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors, bool allocated, bool valid)
{
int64_t cluster_num, nb_clusters;
if (iscsilun->allocationmap == NULL) {
int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
if (iscsilun->allocmap == NULL) {
return;
}
cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
- cluster_num;
if (nb_clusters > 0) {
bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
/* expand to entirely contain all affected clusters */
cl_num_expanded = sector_num / iscsilun->cluster_sectors;
nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
iscsilun->cluster_sectors) - cl_num_expanded;
/* shrink to touch only completely contained clusters */
cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
- cl_num_shrunk;
if (allocated) {
bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
} else {
bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
}
if (iscsilun->allocmap_valid == NULL) {
return;
}
if (valid) {
bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
} else {
bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
nb_cls_expanded);
}
}
static void
iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
{
iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
}
static void
iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
{
/* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
* is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
*/
iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
}
static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
{
iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
}
static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
{
if (iscsilun->allocmap) {
bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
}
if (iscsilun->allocmap_valid) {
bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
}
}
static inline bool
iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
int nb_sectors)
{
unsigned long size;
if (iscsilun->allocmap == NULL) {
return true;
}
size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
return !(find_next_bit(iscsilun->allocmap, size,
sector_num / iscsilun->cluster_sectors) == size);
}
static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
int64_t sector_num, int nb_sectors)
{
unsigned long size;
if (iscsilun->allocmap_valid == NULL) {
return false;
}
size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
return (find_next_zero_bit(iscsilun->allocmap_valid, size,
sector_num / iscsilun->cluster_sectors) == size);
}
static int coroutine_fn
@@ -473,10 +586,8 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
return -EINVAL;
}
if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
"of %d sectors", nb_sectors, bs->bl.max_transfer_length);
return -EINVAL;
if (bs->bl.max_transfer) {
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
lba = sector_qemu2lun(sector_num, iscsilun);
@@ -515,26 +626,16 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
return iTask.err_code;
}
iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
}
static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
int64_t sector_num, int nb_sectors)
{
unsigned long size;
if (iscsilun->allocationmap == NULL) {
return true;
}
size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
return !(find_next_bit(iscsilun->allocationmap, size,
sector_num / iscsilun->cluster_sectors) == size);
}
static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
@@ -619,9 +720,9 @@ retry:
}
if (ret & BDRV_BLOCK_ZERO) {
iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
} else {
iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
}
if (*pnum > nb_sectors) {
@@ -650,23 +751,36 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
return -EINVAL;
}
if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
"of %d sectors", nb_sectors, bs->bl.max_transfer_length);
return -EINVAL;
if (bs->bl.max_transfer) {
assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
!iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int64_t ret;
/* if cache.direct is off and we have a valid entry in our allocation map
* we can skip checking the block status and directly return zeroes if
* the request falls within an unallocated area */
if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
!iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
!iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
!iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int pnum;
BlockDriverState *file;
ret = iscsi_co_get_block_status(bs, sector_num,
BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
/* check the block status from the beginning of the cluster
* containing the start sector */
int64_t ret = iscsi_co_get_block_status(bs,
sector_num - sector_num % iscsilun->cluster_sectors,
BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
if (ret < 0) {
return ret;
}
if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
/* if the whole request falls into an unallocated area we can avoid
* to read and directly return zeroes instead */
if (ret & BDRV_BLOCK_ZERO &&
pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
@@ -928,29 +1042,26 @@ iscsi_getlength(BlockDriverState *bs)
}
static int
coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
assert(is_byte_request_lun_aligned(offset, count, iscsilun));
if (!iscsilun->lbp.lbpu) {
/* UNMAP is not supported by the target */
return 0;
}
list.lba = sector_qemu2lun(sector_num, iscsilun);
list.num = sector_qemu2lun(nb_sectors, iscsilun);
list.lba = offset / iscsilun->block_size;
list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
iscsi_co_generic_cb, &iTask) == NULL) {
iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
}
@@ -980,7 +1091,8 @@ retry:
return iTask.err_code;
}
iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return 0;
}
@@ -1070,15 +1182,17 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return iTask.err_code;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
iscsi_allocationmap_clear(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
} else {
iscsi_allocationmap_set(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
}
return 0;
@@ -1589,14 +1703,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
bs->request_alignment = iscsilun->block_size;
/* We don't have any emulation for devices other than disks and CD-ROMs, so
* this must be sg ioctl compatible. We force it to be sg, otherwise qemu
* will try to read from the device to guess the image format.
*/
if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
bs->sg = 1;
bs->sg = true;
}
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1652,10 +1765,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
iscsilun->block_size) >> BDRV_SECTOR_BITS;
if (iscsilun->lbprz) {
iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
if (iscsilun->allocationmap == NULL) {
ret = -ENOMEM;
}
ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
}
}
@@ -1692,38 +1802,37 @@ static void iscsi_close(BlockDriverState *bs)
}
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
g_free(iscsilun->allocationmap);
iscsi_allocmap_free(iscsilun);
memset(iscsilun, 0, sizeof(IscsiLun));
}
static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
{
return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
}
static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* We don't actually refresh here, but just return data queried in
* iscsi_open(): iscsi targets don't change their limits. */
IscsiLun *iscsilun = bs->opaque;
uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
bs->bl.request_alignment = iscsilun->block_size;
if (iscsilun->bl.max_xfer_len) {
max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
}
bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
if (max_xfer_len * iscsilun->block_size < INT_MAX) {
bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
}
if (iscsilun->lbp.lbpu) {
if (iscsilun->bl.max_unmap < 0xffffffff) {
bs->bl.max_discard =
sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
bs->bl.max_pdiscard =
iscsilun->bl.max_unmap * iscsilun->block_size;
}
bs->bl.discard_alignment =
sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
bs->bl.pdiscard_alignment =
iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
} else {
bs->bl.discard_alignment = iscsilun->block_size >> BDRV_SECTOR_BITS;
bs->bl.pdiscard_alignment = iscsilun->block_size;
}
if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
@@ -1736,8 +1845,11 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
} else {
bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
}
bs->bl.opt_transfer_length =
sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
if (iscsilun->bl.opt_xfer_len &&
iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
iscsilun->block_size);
}
}
/* Note that this will not re-establish a connection with an iSCSI target - it
@@ -1754,6 +1866,16 @@ static int iscsi_reopen_prepare(BDRVReopenState *state,
return 0;
}
static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
{
IscsiLun *iscsilun = reopen_state->bs->opaque;
/* the cache.direct status might have changed */
if (iscsilun->allocmap != NULL) {
iscsi_allocmap_init(iscsilun, reopen_state->flags);
}
}
static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
{
IscsiLun *iscsilun = bs->opaque;
@@ -1773,9 +1895,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
if (iscsilun->allocationmap != NULL) {
g_free(iscsilun->allocationmap);
iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
if (iscsilun->allocmap != NULL) {
iscsi_allocmap_init(iscsilun, bs->open_flags);
}
return 0;
@@ -1835,6 +1956,13 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
static void iscsi_invalidate_cache(BlockDriverState *bs,
Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
iscsi_allocmap_invalidate(iscsilun);
}
static QemuOptsList iscsi_create_opts = {
.name = "iscsi-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -1858,7 +1986,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -1866,7 +1996,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_get_block_status = iscsi_co_get_block_status,
.bdrv_co_discard = iscsi_co_discard,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev_flags = iscsi_co_writev_flags,

View File

@@ -28,8 +28,6 @@
*/
#define MAX_EVENTS 128
#define MAX_QUEUED_IO 128
struct qemu_laiocb {
BlockAIOCB common;
Coroutine *co;
@@ -44,12 +42,15 @@ struct qemu_laiocb {
typedef struct {
int plugged;
unsigned int n;
unsigned int in_queue;
unsigned int in_flight;
bool blocked;
QSIMPLEQ_HEAD(, qemu_laiocb) pending;
} LaioQueue;
struct LinuxAioState {
AioContext *aio_context;
io_context_t ctx;
EventNotifier e;
@@ -87,14 +88,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
qemu_iovec_memset(laiocb->qiov, ret, 0,
laiocb->qiov->size - ret);
} else {
ret = -EINVAL;
ret = -ENOSPC;
}
}
}
laiocb->ret = ret;
if (laiocb->co) {
qemu_coroutine_enter(laiocb->co, NULL);
qemu_coroutine_enter(laiocb->co);
} else {
laiocb->common.cb(laiocb->common.opaque, ret);
qemu_aio_unref(laiocb);
@@ -129,6 +130,7 @@ static void qemu_laio_completion_bh(void *opaque)
s->event_max = 0;
return; /* no more events */
}
s->io_q.in_flight -= s->event_max;
}
/* Reschedule so nested event loops see currently pending completions */
@@ -190,7 +192,8 @@ static void ioq_init(LaioQueue *io_q)
{
QSIMPLEQ_INIT(&io_q->pending);
io_q->plugged = 0;
io_q->n = 0;
io_q->in_queue = 0;
io_q->in_flight = 0;
io_q->blocked = false;
}
@@ -198,14 +201,17 @@ static void ioq_submit(LinuxAioState *s)
{
int ret, len;
struct qemu_laiocb *aiocb;
struct iocb *iocbs[MAX_QUEUED_IO];
struct iocb *iocbs[MAX_EVENTS];
QSIMPLEQ_HEAD(, qemu_laiocb) completed;
do {
if (s->io_q.in_flight >= MAX_EVENTS) {
break;
}
len = 0;
QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
iocbs[len++] = &aiocb->iocb;
if (len == MAX_QUEUED_IO) {
if (s->io_q.in_flight + len >= MAX_EVENTS) {
break;
}
}
@@ -215,27 +221,33 @@ static void ioq_submit(LinuxAioState *s)
break;
}
if (ret < 0) {
abort();
/* Fail the first request, retry the rest */
aiocb = QSIMPLEQ_FIRST(&s->io_q.pending);
QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
s->io_q.in_queue--;
aiocb->ret = ret;
qemu_laio_process_completion(aiocb);
continue;
}
s->io_q.n -= ret;
s->io_q.in_flight += ret;
s->io_q.in_queue -= ret;
aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
} while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
s->io_q.blocked = (s->io_q.n > 0);
s->io_q.blocked = (s->io_q.in_queue > 0);
}
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
{
assert(!s->io_q.plugged);
s->io_q.plugged = 1;
s->io_q.plugged++;
}
void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
{
assert(s->io_q.plugged);
s->io_q.plugged = 0;
if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
if (--s->io_q.plugged == 0 &&
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
}
@@ -263,9 +275,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
s->io_q.n++;
s->io_q.in_queue++;
if (!s->io_q.blocked &&
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
(!s->io_q.plugged ||
s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
ioq_submit(s);
}
@@ -325,6 +338,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
{
s->aio_context = new_context;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
aio_set_event_notifier(new_context, &s->e, false,
qemu_laio_completion_cb);

View File

@@ -23,7 +23,9 @@
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
#define DEFAULT_MIRROR_BUF_SIZE \
(MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
/* The mirroring buffer is a list of granularity-sized chunks.
* Free chunks are organized in a list.
@@ -58,9 +60,10 @@ typedef struct MirrorBlockJob {
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
int buf_free_count;
uint64_t last_pause_ns;
unsigned long *in_flight_bitmap;
int in_flight;
int sectors_in_flight;
int64_t sectors_in_flight;
int ret;
bool unmap;
bool waiting_for_io;
@@ -121,7 +124,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
g_free(op);
if (s->waiting_for_io) {
qemu_coroutine_enter(s->common.co, NULL);
qemu_coroutine_enter(s->common.co);
}
}
@@ -303,8 +306,9 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
s->in_flight++;
s->sectors_in_flight += nb_sectors;
if (is_discard) {
blk_aio_discard(s->target, sector_num, op->nb_sectors,
mirror_write_complete, op);
blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
op->nb_sectors << BDRV_SECTOR_BITS,
mirror_write_complete, op);
} else {
blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
op->nb_sectors * BDRV_SECTOR_SIZE,
@@ -322,6 +326,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int nb_chunks = 1;
int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
MAX_IO_SECTORS);
sector_num = hbitmap_iter_next(&s->hbi);
if (sector_num < 0) {
@@ -372,7 +379,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int io_sectors;
int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
MIRROR_METHOD_COPY,
@@ -385,7 +392,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk,
&io_sectors, &file);
if (ret < 0) {
io_sectors = nb_chunks * sectors_per_chunk;
io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
} else if (ret & BDRV_BLOCK_DATA) {
io_sectors = MIN(io_sectors, max_io_sectors);
}
io_sectors -= io_sectors % sectors_per_chunk;
@@ -405,16 +414,30 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
}
while (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
mirror_wait_for_io(s);
}
if (s->ret < 0) {
return 0;
}
mirror_clip_sectors(s, sector_num, &io_sectors);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
io_sectors = mirror_do_read(s, sector_num, io_sectors);
io_sectors_acct = io_sectors;
break;
case MIRROR_METHOD_ZERO:
mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
break;
case MIRROR_METHOD_DISCARD:
mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
mirror_do_zero_or_discard(s, sector_num, io_sectors,
mirror_method == MIRROR_METHOD_DISCARD);
if (write_zeroes_ok) {
io_sectors_acct = 0;
} else {
io_sectors_acct = io_sectors;
}
break;
default:
abort();
@@ -422,7 +445,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(io_sectors);
sector_num += io_sectors;
nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
}
}
return delay_ns;
}
@@ -506,25 +531,97 @@ static void mirror_exit(BlockJob *job, void *opaque)
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
aio_enable_external(iohandler_get_aio_context());
}
bdrv_unref(src);
}
static void mirror_throttle(MirrorBlockJob *s)
{
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (now - s->last_pause_ns > SLICE_TIME) {
s->last_pause_ns = now;
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
} else {
block_job_pause_point(&s->common);
}
}
static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
end = s->bdev_length / BDRV_SECTOR_SIZE;
if (base == NULL && !bdrv_has_zero_init(target_bs)) {
if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
return 0;
}
for (sector_num = 0; sector_num < end; ) {
int nb_sectors = MIN(end - sector_num,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
return 0;
}
if (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
mirror_wait_for_io(s);
continue;
}
mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
sector_num += nb_sectors;
}
mirror_drain(s);
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
end - sector_num);
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
return 0;
}
ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
if (ret < 0) {
return ret;
}
assert(n > 0);
if (ret == 1) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
}
return 0;
}
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
BlockDriverState *bs = blk_bs(s->common.blk);
BlockDriverState *target_bs = blk_bs(s->target);
int64_t sector_num, end, length;
uint64_t last_pause_ns;
int64_t length;
BlockDriverInfo bdi;
char backing_filename[2]; /* we only need 2 characters because we are only
checking for a NULL string */
int ret = 0;
int n;
int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
@@ -566,7 +663,6 @@ static void coroutine_fn mirror_run(void *opaque)
s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
@@ -575,47 +671,18 @@ static void coroutine_fn mirror_run(void *opaque)
mirror_free_init(s);
last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (!s->is_none_mode) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base = s->base;
bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
end - sector_num);
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (now - last_pause_ns > SLICE_TIME) {
last_pause_ns = now;
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
} else {
block_job_pause_point(&s->common);
}
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
}
ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
if (ret < 0) {
goto immediate_exit;
}
assert(n > 0);
if (ret == 1 || mark_all_dirty) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
ret = mirror_dirty_init(s);
if (ret < 0 || block_job_is_cancelled(&s->common)) {
goto immediate_exit;
}
}
bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
for (;;) {
uint64_t delay_ns = 0;
int64_t cnt;
int64_t cnt, delta;
bool should_complete;
if (s->ret < 0) {
@@ -638,9 +705,10 @@ static void coroutine_fn mirror_run(void *opaque)
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
if (delta < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
mirror_wait_for_io(s);
@@ -708,7 +776,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.cancelled = false;
break;
}
last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
immediate_exit:
@@ -732,12 +800,6 @@ immediate_exit:
/* Before we switch to target in mirror_exit, make sure data doesn't
* change. */
bdrv_drained_begin(bs);
if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
/* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
* above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
* need a block layer API change to achieve this. */
aio_disable_external(iohandler_get_aio_context());
}
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -761,7 +823,8 @@ static void mirror_complete(BlockJob *job, Error **errp)
target = blk_bs(s->target);
if (!s->synced) {
error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
error_setg(errp, "The active block job '%s' cannot be completed",
job->id);
return;
}
@@ -842,8 +905,8 @@ static const BlockJobDriver commit_active_job_driver = {
.attached_aio_context = mirror_attached_aio_context,
};
static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
const char *replaces,
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity,
int64_t buf_size,
BlockMirrorBackingMode backing_mode,
@@ -872,7 +935,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
s = block_job_create(driver, bs, speed, cb, opaque, errp);
s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
}
@@ -900,13 +963,13 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bdrv_op_block_all(target, s->common.blocker);
s->common.co = qemu_coroutine_create(mirror_run);
s->common.co = qemu_coroutine_create(mirror_run, s);
trace_mirror_start(bs, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
qemu_coroutine_enter(s->common.co);
}
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
const char *replaces,
void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
@@ -924,14 +987,14 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(bs, target, replaces,
mirror_start_job(job_id, bs, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
int64_t speed,
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int64_t speed,
BlockdevOnError on_error,
BlockCompletionFunc *cb,
void *opaque, Error **errp)
@@ -972,7 +1035,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
}
}
mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN,
mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, false, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
if (local_err) {

View File

@@ -38,7 +38,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
if (s->recv_coroutine[i]) {
qemu_coroutine_enter(s->recv_coroutine[i], NULL);
qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
}
@@ -99,7 +99,7 @@ static void nbd_reply_ready(void *opaque)
}
if (s->recv_coroutine[i]) {
qemu_coroutine_enter(s->recv_coroutine[i], NULL);
qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
@@ -111,12 +111,12 @@ static void nbd_restart_write(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
}
static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
QEMUIOVector *qiov, int offset)
QEMUIOVector *qiov)
{
NbdClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
@@ -149,8 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
offset, request->len, 0);
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
false);
if (ret != request->len) {
rc = -EIO;
}
@@ -167,8 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
}
static void nbd_co_receive_reply(NbdClientSession *s,
struct nbd_request *request, struct nbd_reply *reply,
QEMUIOVector *qiov, int offset)
struct nbd_request *request,
struct nbd_reply *reply,
QEMUIOVector *qiov)
{
int ret;
@@ -181,8 +182,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
offset, request->len, 1);
ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
true);
if (ret != request->len) {
reply->error = EIO;
}
@@ -217,36 +218,41 @@ static void nbd_coroutine_end(NbdClientSession *s,
}
}
static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
int offset)
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_READ };
struct nbd_request request = {
.type = NBD_CMD_READ,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
ssize_t ret;
request.from = sector_num * 512;
request.len = nb_sectors * 512;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
assert(!flags);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, qiov, offset);
nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
int offset, int flags)
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_WRITE };
struct nbd_request request = {
.type = NBD_CMD_WRITE,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
ssize_t ret;
@@ -255,59 +261,19 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
request.type |= NBD_CMD_FLAG_FUA;
}
request.from = sector_num * 512;
request.len = nb_sectors * 512;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, qiov, offset);
ret = nbd_co_send_request(bs, &request, qiov);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
/* qemu-nbd has a limit of slightly less than 1M per request. Try to
* remain aligned to 4K. */
#define NBD_MAX_SECTORS 2040
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
if (ret < 0) {
return ret;
}
offset += NBD_MAX_SECTORS * 512;
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
}
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov, int flags)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
flags);
if (ret < 0) {
return ret;
}
offset += NBD_MAX_SECTORS * 512;
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
}
int nbd_client_co_flush(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
@@ -323,36 +289,37 @@ int nbd_client_co_flush(BlockDriverState *bs)
request.len = 0;
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_TRIM };
struct nbd_request request = {
.type = NBD_CMD_TRIM,
.from = offset,
.len = count,
};
struct nbd_reply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
return 0;
}
request.from = sector_num * 512;
request.len = nb_sectors * 512;
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL, 0);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL, 0);
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;

View File

@@ -20,7 +20,7 @@
typedef struct NbdClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint32_t nbdflags;
uint16_t nbdflags;
off_t size;
CoMutex send_mutex;
@@ -44,13 +44,12 @@ int nbd_client_init(BlockDriverState *bs,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov, int flags);
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
void nbd_client_detach_aio_context(BlockDriverState *bs);
void nbd_client_attach_aio_context(BlockDriverState *bs,

View File

@@ -42,6 +42,9 @@
typedef struct BDRVNBDState {
NbdClientSession client;
/* For nbd_refresh_filename() */
char *path, *host, *port, *export, *tlscredsid;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -188,13 +191,15 @@ out:
g_free(file);
}
static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
Error **errp)
static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
{
SocketAddress *saddr;
if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
if (qdict_haskey(options, "path")) {
s->path = g_strdup(qemu_opt_get(opts, "path"));
s->host = g_strdup(qemu_opt_get(opts, "host"));
if (!s->path == !s->host) {
if (s->path) {
error_setg(errp, "path and host may not be used at the same time.");
} else {
error_setg(errp, "one of path and host must be specified.");
@@ -204,32 +209,28 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
saddr = g_new0(SocketAddress, 1);
if (qdict_haskey(options, "path")) {
if (s->path) {
UnixSocketAddress *q_unix;
saddr->type = SOCKET_ADDRESS_KIND_UNIX;
q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
q_unix->path = g_strdup(qdict_get_str(options, "path"));
qdict_del(options, "path");
q_unix->path = g_strdup(s->path);
} else {
InetSocketAddress *inet;
s->port = g_strdup(qemu_opt_get(opts, "port"));
saddr->type = SOCKET_ADDRESS_KIND_INET;
inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
inet->host = g_strdup(qdict_get_str(options, "host"));
if (!qdict_get_try_str(options, "port")) {
inet->host = g_strdup(s->host);
inet->port = g_strdup(s->port);
if (!inet->port) {
inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
} else {
inet->port = g_strdup(qdict_get_str(options, "port"));
}
qdict_del(options, "host");
qdict_del(options, "port");
}
s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
*export = g_strdup(qdict_get_try_str(options, "export"));
if (*export) {
qdict_del(options, "export");
}
s->export = g_strdup(qemu_opt_get(opts, "export"));
return saddr;
}
@@ -292,28 +293,66 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
}
static QemuOptsList nbd_runtime_opts = {
.name = "nbd",
.head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head),
.desc = {
{
.name = "host",
.type = QEMU_OPT_STRING,
.help = "TCP host to connect to",
},
{
.name = "port",
.type = QEMU_OPT_STRING,
.help = "TCP port to connect to",
},
{
.name = "path",
.type = QEMU_OPT_STRING,
.help = "Unix socket path to connect to",
},
{
.name = "export",
.type = QEMU_OPT_STRING,
.help = "Name of the NBD export to open",
},
{
.name = "tls-creds",
.type = QEMU_OPT_STRING,
.help = "ID of the TLS credentials to use",
},
},
};
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
char *export = NULL;
QemuOpts *opts = NULL;
Error *local_err = NULL;
QIOChannelSocket *sioc = NULL;
SocketAddress *saddr;
const char *tlscredsid;
SocketAddress *saddr = NULL;
QCryptoTLSCreds *tlscreds = NULL;
const char *hostname = NULL;
int ret = -EINVAL;
opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto error;
}
/* Pop the config into our state object. Exit if invalid. */
saddr = nbd_config(s, options, &export, errp);
saddr = nbd_config(s, opts, errp);
if (!saddr) {
goto error;
}
tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
if (tlscredsid) {
qdict_del(options, "tls-creds");
tlscreds = nbd_get_tls_creds(tlscredsid, errp);
s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
if (s->tlscredsid) {
tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
if (!tlscreds) {
goto error;
}
@@ -335,7 +374,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
}
/* NBD handshake */
ret = nbd_client_init(bs, sioc, export,
ret = nbd_client_init(bs, sioc, s->export,
tlscreds, hostname, errp);
error:
if (sioc) {
@@ -344,17 +383,18 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
if (tlscreds) {
object_unref(OBJECT(tlscreds));
}
if (ret < 0) {
g_free(s->path);
g_free(s->host);
g_free(s->port);
g_free(s->export);
g_free(s->tlscredsid);
}
qapi_free_SocketAddress(saddr);
g_free(export);
qemu_opts_del(opts);
return ret;
}
static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
}
static int nbd_co_flush(BlockDriverState *bs)
{
return nbd_client_co_flush(bs);
@@ -362,19 +402,21 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
}
static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
return nbd_client_co_discard(bs, sector_num, nb_sectors);
bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
static void nbd_close(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
nbd_client_close(bs);
g_free(s->path);
g_free(s->host);
g_free(s->port);
g_free(s->export);
g_free(s->tlscredsid);
}
static int64_t nbd_getlength(BlockDriverState *bs)
@@ -397,48 +439,45 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVNBDState *s = bs->opaque;
QDict *opts = qdict_new();
const char *path = qdict_get_try_str(options, "path");
const char *host = qdict_get_try_str(options, "host");
const char *port = qdict_get_try_str(options, "port");
const char *export = qdict_get_try_str(options, "export");
const char *tlscreds = qdict_get_try_str(options, "tls-creds");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
if (path && export) {
if (s->path && s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd+unix:///%s?socket=%s", export, path);
} else if (path && !export) {
"nbd+unix:///%s?socket=%s", s->export, s->path);
} else if (s->path && !s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd+unix://?socket=%s", path);
} else if (!path && export && port) {
"nbd+unix://?socket=%s", s->path);
} else if (!s->path && s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s:%s/%s", host, port, export);
} else if (!path && export && !port) {
"nbd://%s:%s/%s", s->host, s->port, s->export);
} else if (!s->path && s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s/%s", host, export);
} else if (!path && !export && port) {
"nbd://%s/%s", s->host, s->export);
} else if (!s->path && !s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s:%s", host, port);
} else if (!path && !export && !port) {
"nbd://%s:%s", s->host, s->port);
} else if (!s->path && !s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"nbd://%s", host);
"nbd://%s", s->host);
}
if (path) {
qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
} else if (port) {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
if (s->path) {
qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
} else if (s->port) {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
} else {
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
}
if (export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
if (s->export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
}
if (tlscreds) {
qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
if (s->tlscredsid) {
qdict_put_obj(opts, "tls-creds",
QOBJECT(qstring_from_str(s->tlscredsid)));
}
bs->full_open_options = opts;
@@ -450,11 +489,11 @@ static BlockDriver bdrv_nbd = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -468,11 +507,11 @@ static BlockDriver bdrv_nbd_tcp = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -486,11 +525,11 @@ static BlockDriver bdrv_nbd_unix = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,

View File

@@ -104,7 +104,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
NFSRPC *task = opaque;
task->complete = 1;
qemu_bh_delete(task->bh);
qemu_coroutine_enter(task->co, NULL);
qemu_coroutine_enter(task->co);
}
static void

View File

@@ -43,6 +43,7 @@
#define HEADER_MAGIC2 "WithouFreSpacExt"
#define HEADER_VERSION 2
#define HEADER_INUSE_MAGIC (0x746F6E59)
#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
@@ -210,7 +211,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
ret = bdrv_pwrite_zeroes(bs->file->bs,
ret = bdrv_pwrite_zeroes(bs->file,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
} else {
@@ -250,7 +251,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
if (off + to_write > s->header_size) {
to_write = s->header_size - off;
}
ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
to_write);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
@@ -311,7 +312,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -351,7 +352,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -432,7 +433,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
}
if (flush_bat) {
ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -475,6 +476,10 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
BDRV_SECTOR_SIZE);
cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
error_propagate(errp, local_err);
return -E2BIG;
}
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -563,7 +568,7 @@ static int parallels_update_header(BlockDriverState *bs)
if (size > s->header_size) {
size = s->header_size;
}
return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
return bdrv_pwrite_sync(bs->file, 0, s->header, size);
}
static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -576,7 +581,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
}
@@ -631,7 +636,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->header_size = size;
}
ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
if (ret < 0) {
goto fail;
}

View File

@@ -690,16 +690,15 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
ImageInfoSpecific *info_spec)
{
QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
Visitor *v = qmp_output_visitor_new(&obj);
visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
&error_abort);
obj = qmp_output_get_qobject(ov);
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
qmp_output_visitor_cleanup(ov);
visit_free(v);
}
void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,

View File

@@ -105,7 +105,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
QCowHeader header;
ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
}
@@ -174,7 +174,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->encrypted = 1;
bs->encrypted = true;
}
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
@@ -208,7 +208,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -239,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
goto fail;
@@ -390,7 +390,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
tmp = cpu_to_be64(l2_offset);
if (bdrv_pwrite_sync(bs->file->bs,
if (bdrv_pwrite_sync(bs->file,
s->l1_table_offset + l1_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
@@ -420,11 +420,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_table = s->l2_cache + (min_index << s->l2_bits);
if (new_l2_table) {
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
if (bdrv_pread(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
@@ -450,7 +450,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
s->cluster_size) !=
s->cluster_size)
return -1;
@@ -480,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
errno = EIO;
return -1;
}
if (bdrv_pwrite(bs->file->bs,
if (bdrv_pwrite(bs->file,
cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
@@ -495,7 +495,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
}
@@ -565,7 +565,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -619,8 +619,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->backing->bs, sector_num,
n, &hd_qiov);
ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -644,7 +643,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file->bs,
ret = bdrv_co_readv(bs->file,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -746,7 +745,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_writev(bs->file->bs,
ret = bdrv_co_writev(bs->file,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -900,7 +899,7 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
@@ -916,32 +915,32 @@ static int qcow_make_empty(BlockDriverState *bs)
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
static coroutine_fn int
qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
QEMUIOVector hd_qiov;
struct iovec iov;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
uint8_t *buf, *out_buf;
uint64_t cluster_offset;
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
buf = qemu_blockalign(bs, s->cluster_size);
if (bytes != s->cluster_size) {
if (bytes > s->cluster_size ||
offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
{
qemu_vfree(buf);
return -EINVAL;
}
return ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -970,27 +969,35 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
} else {
cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
out_len, 0, 0);
if (cluster_offset == 0) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
bytes >> BDRV_SECTOR_BITS, qiov);
if (ret < 0) {
goto fail;
}
goto success;
}
qemu_co_mutex_lock(&s->lock);
cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
qemu_co_mutex_unlock(&s->lock);
if (cluster_offset == 0) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
iov = (struct iovec) {
.iov_base = out_buf,
.iov_len = out_len,
};
qemu_iovec_init_external(&hd_qiov, &iov, 1);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
if (ret < 0) {
goto fail;
}
success:
ret = 0;
fail:
qemu_vfree(buf);
g_free(out_buf);
return ret;
}
@@ -1043,7 +1050,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
.bdrv_get_info = qcow_get_info,
.create_opts = &qcow_create_opts,

View File

@@ -210,7 +210,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
ret = bdrv_pwrite(bs->file, c->entries[i].offset,
qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
if (ret < 0) {
return ret;
@@ -357,7 +357,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
ret = bdrv_pread(bs->file->bs, offset,
ret = bdrv_pread(bs->file, offset,
qcow2_cache_get_table_addr(bs, c, i),
s->cluster_size);
if (ret < 0) {

View File

@@ -65,7 +65,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
}
}
if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
return -EFBIG;
}
@@ -108,7 +109,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
new_l1_table, new_l1_size2);
if (ret < 0)
goto fail;
@@ -117,9 +118,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
/* set new table */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
stl_be_p(data, new_l1_size);
stq_be_p(data + 4, new_l1_table_offset);
ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
data, sizeof(data));
if (ret < 0) {
goto fail;
@@ -185,7 +186,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
ret = bdrv_pwrite_sync(bs->file->bs,
ret = bdrv_pwrite_sync(bs->file,
s->l1_table_offset + 8 * l1_start_index,
buf, sizeof(buf));
if (ret < 0) {
@@ -446,7 +447,7 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_co_pwritev(bs->file->bs, cluster_offset + offset_in_cluster,
ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
bytes, &qiov, 0);
if (ret < 0) {
goto out;
@@ -482,8 +483,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
unsigned int offset_in_cluster, nb_clusters;
uint64_t bytes_available, bytes_needed;
unsigned int offset_in_cluster;
uint64_t bytes_available, bytes_needed, nb_clusters;
int ret;
offset_in_cluster = offset_into_cluster(s, offset);
@@ -499,7 +500,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
if (bytes_needed > bytes_available) {
bytes_needed = bytes_available;
}
assert(bytes_needed <= INT_MAX);
*cluster_offset = 0;
@@ -536,8 +536,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
/* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
nb_clusters = size_to_clusters(s, bytes_needed);
/* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
* integers; the minimum cluster size is 512, so this assertion is always
* true */
assert(nb_clusters <= INT_MAX);
ret = qcow2_get_cluster_type(*cluster_offset);
switch (ret) {
@@ -584,13 +587,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
bytes_available = (c * s->cluster_size);
bytes_available = (int64_t)c * s->cluster_size;
out:
if (bytes_available > bytes_needed) {
bytes_available = bytes_needed;
}
/* bytes_available <= bytes_needed <= *bytes + offset_in_cluster;
* subtracting offset_in_cluster will therefore definitely yield something
* not exceeding UINT_MAX */
assert(bytes_available - offset_in_cluster <= UINT_MAX);
*bytes = bytes_available - offset_in_cluster;
return ret;
@@ -1408,7 +1415,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
nb_csectors);
if (ret < 0) {
return ret;
@@ -1677,7 +1684,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(void **)&l2_table);
} else {
/* load inactive L2 tables from disk */
ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
(void *)l2_table, s->cluster_sectors);
}
if (ret < 0) {
@@ -1752,7 +1759,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
ret = bdrv_pwrite_zeroes(bs->file->bs, offset, s->cluster_size, 0);
ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1784,7 +1791,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
(void *)l2_table, s->cluster_sectors);
if (ret < 0) {
goto fail;
@@ -1859,7 +1866,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
ret = bdrv_read(bs->file->bs,
ret = bdrv_read(bs->file,
s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
(void *)l1_table, l1_sectors);
if (ret < 0) {

View File

@@ -104,7 +104,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret < 0) {
goto fail;
@@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
if (refcount_table_index < s->refcount_table_size) {
uint64_t data64 = cpu_to_be64(new_block);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
ret = bdrv_pwrite_sync(bs->file->bs,
ret = bdrv_pwrite_sync(bs->file,
s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
&data64, sizeof(data64));
if (ret < 0) {
@@ -533,7 +533,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Write refcount blocks to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
g_free(new_blocks);
new_blocks = NULL;
@@ -547,7 +547,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
table_size * sizeof(uint64_t));
if (ret < 0) {
goto fail_table;
@@ -562,10 +562,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t d64;
uint32_t d32;
} data;
cpu_to_be64w(&data.d64, table_offset);
cpu_to_be32w(&data.d32, table_clusters);
data.d64 = cpu_to_be64(table_offset);
data.d32 = cpu_to_be32(table_clusters);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
ret = bdrv_pwrite_sync(bs->file->bs,
ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
&data, sizeof(data));
if (ret < 0) {
@@ -615,9 +615,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
/* Discard is optional, ignore the return value */
if (ret >= 0) {
bdrv_discard(bs->file->bs,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
}
g_free(d);
@@ -1070,7 +1068,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
l1_allocated = true;
ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
goto fail;
}
@@ -1223,7 +1221,7 @@ fail:
cpu_to_be64s(&l1_table[i]);
}
ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
@@ -1382,7 +1380,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
l2_size = s->l2_size * sizeof(uint64_t);
l2_table = g_malloc(l2_size);
ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
res->check_errors++;
@@ -1514,7 +1512,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
res->check_errors++;
goto fail;
}
ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
res->check_errors++;
@@ -1612,7 +1610,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
}
}
ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
ret = bdrv_pread(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1664,7 +1662,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
goto fail;
}
ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
@@ -2098,7 +2096,7 @@ write_refblocks:
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
on_disk_refblock, s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2147,7 +2145,7 @@ write_refblocks:
}
assert(reftable_size < INT_MAX / sizeof(uint64_t));
ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
reftable_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2155,12 +2153,11 @@ write_refblocks:
}
/* Enter new reftable into the image header */
cpu_to_be64w(&reftable_offset_and_clusters.reftable_offset,
reftable_offset);
cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
size_to_clusters(s, reftable_size * sizeof(uint64_t)));
ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
refcount_table_offset),
reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
reftable_offset_and_clusters.reftable_clusters =
cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t)));
ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
@@ -2407,7 +2404,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
return -ENOMEM;
}
ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
return ret;
@@ -2560,7 +2557,7 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
return ret;
}
ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to write refblock");
return ret;
@@ -2830,7 +2827,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
cpu_to_be64s(&new_reftable[i]);
}
ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
new_reftable_size * sizeof(uint64_t));
for (i = 0; i < new_reftable_size; i++) {

View File

@@ -67,7 +67,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
@@ -86,7 +86,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
name_size = be16_to_cpu(h.name_size);
/* Read extra data */
ret = bdrv_pread(bs->file->bs, offset, &extra,
ret = bdrv_pread(bs->file, offset, &extra,
MIN(sizeof(extra), extra_data_size));
if (ret < 0) {
goto fail;
@@ -105,7 +105,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
@@ -114,7 +114,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
ret = bdrv_pread(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -217,25 +217,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
if (ret < 0) {
goto fail;
}
offset += sizeof(extra);
ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -257,7 +257,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
@@ -399,7 +399,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
}
ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -512,7 +512,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
ret = bdrv_pread(bs->file, sn->l1_table_offset,
sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
@@ -530,7 +530,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
@@ -716,7 +716,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -ENOMEM;
}
ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
ret = bdrv_pread(bs->file, sn->l1_table_offset,
new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");

View File

@@ -107,7 +107,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
printf("attempting to read extended header in offset %lu\n", offset);
#endif
ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
if (ret < 0) {
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
"pread fail from offset %" PRIu64, offset);
@@ -135,7 +135,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
sizeof(bs->backing_format));
return 2;
}
ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
"Could not read format name");
@@ -151,7 +151,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
if (p_feature_table != NULL) {
void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
"Could not read table");
@@ -172,7 +172,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uext->len = ext.len;
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
"Could not read data");
@@ -249,7 +249,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
}
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
&val, sizeof(val));
if (ret < 0) {
return ret;
@@ -817,7 +817,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
uint64_t ext_end;
uint64_t l1_vm_state_index;
ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read qcow2 header");
goto fail;
@@ -892,7 +892,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (header.header_length > sizeof(header)) {
s->unknown_header_fields_size = header.header_length - sizeof(header);
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
s->unknown_header_fields_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -980,10 +980,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->encrypted = 1;
/* Encryption works on a sector granularity */
bs->request_alignment = BDRV_SECTOR_SIZE;
bs->encrypted = true;
}
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1069,7 +1066,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1125,7 +1122,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1202,6 +1199,10 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
if (bs->encrypted) {
/* Encryption works on a sector granularity */
bs->bl.request_alignment = BDRV_SECTOR_SIZE;
}
bs->bl.pwrite_zeroes_alignment = s->cluster_size;
}
@@ -1442,7 +1443,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
ret = bdrv_co_preadv(bs->backing, offset, n1,
&local_qiov, 0);
qemu_co_mutex_lock(&s->lock);
@@ -1505,7 +1506,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(bs->file->bs,
ret = bdrv_co_preadv(bs->file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
@@ -1636,7 +1637,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
cluster_offset + offset_in_cluster);
ret = bdrv_co_pwritev(bs->file->bs,
ret = bdrv_co_pwritev(bs->file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
@@ -1975,7 +1976,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Write the new header */
ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -2058,7 +2059,7 @@ static int preallocate(BlockDriverState *bs)
*/
if (host_offset != 0) {
uint8_t data = 0;
ret = bdrv_pwrite(bs->file->bs, (host_offset + cur_bytes) - 1,
ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
&data, 1);
if (ret < 0) {
return ret;
@@ -2478,15 +2479,15 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
return ret;
}
static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
int ret;
BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors, QCOW2_DISCARD_REQUEST, false);
ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -2522,7 +2523,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
/* write updated header.size */
offset = cpu_to_be64(offset);
ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
&offset, sizeof(uint64_t));
if (ret < 0) {
return ret;
@@ -2534,39 +2535,39 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
static coroutine_fn int
qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector hd_qiov;
struct iovec iov;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
uint8_t *buf, *out_buf;
uint64_t cluster_offset;
if (nb_sectors == 0) {
if (bytes == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file->bs);
return bdrv_truncate(bs->file->bs, cluster_offset);
}
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow2_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
buf = qemu_blockalign(bs, s->cluster_size);
if (bytes != s->cluster_size) {
if (bytes > s->cluster_size ||
offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
{
qemu_vfree(buf);
return -EINVAL;
}
return ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
qemu_iovec_to_buf(qiov, 0, buf, bytes);
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -2595,33 +2596,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
} else {
cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
sector_num << 9, out_len);
if (!cluster_offset) {
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
if (ret < 0) {
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
if (ret < 0) {
goto fail;
}
goto success;
}
qemu_co_mutex_lock(&s->lock);
cluster_offset =
qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
if (!cluster_offset) {
qemu_co_mutex_unlock(&s->lock);
ret = -EIO;
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
goto fail;
}
iov = (struct iovec) {
.iov_base = out_buf,
.iov_len = out_len,
};
qemu_iovec_init_external(&hd_qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
if (ret < 0) {
goto fail;
}
success:
ret = 0;
fail:
qemu_vfree(buf);
g_free(out_buf);
return ret;
}
@@ -2663,7 +2675,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
ret = bdrv_pwrite_zeroes(bs->file->bs, s->l1_table_offset,
ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
l1_clusters * s->cluster_size, 0);
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2677,7 +2689,7 @@ static int make_completely_empty(BlockDriverState *bs)
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
ret = bdrv_pwrite_zeroes(bs->file->bs, s->cluster_size,
ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
(2 + l1_clusters) * s->cluster_size, 0);
/* This call (even if it failed overall) may have overwritten on-disk
* refcount structures; in that case, the in-memory refcount information
@@ -2693,10 +2705,10 @@ static int make_completely_empty(BlockDriverState *bs)
/* "Create" an empty reftable (one cluster) directly after the image
* header and an empty L1 table three clusters after the image header;
* the cluster between those two will be used as the first refblock */
cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2727,7 +2739,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* Enter the first refblock into the reftable */
rt_entry = cpu_to_be64(2 * s->cluster_size);
ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
&rt_entry, sizeof(rt_entry));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -3364,9 +3376,9 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,

View File

@@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
}
@@ -154,7 +154,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
/* Adjust for offset into table */
offset += start * sizeof(uint64_t);
bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);

View File

@@ -86,7 +86,7 @@ int qed_write_header_sync(BDRVQEDState *s)
int ret;
qed_header_cpu_to_le(&s->header, &le);
ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
if (ret != sizeof(le)) {
return ret;
}
@@ -123,7 +123,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
/* Update header */
qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
write_header_cb->nsectors, qed_write_header_cb,
write_header_cb);
}
@@ -155,7 +155,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
write_header_cb->iov.iov_len = len;
qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
qed_write_header_read_cb, write_header_cb);
}
@@ -218,7 +218,7 @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
*
* The string is NUL-terminated.
*/
static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
char *buf, size_t buflen)
{
int ret;
@@ -389,7 +389,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
s->bs = bs;
QSIMPLEQ_INIT(&s->allocating_write_reqs);
ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
if (ret < 0) {
return ret;
}
@@ -446,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
ret = qed_read_string(bs->file, s->header.backing_filename_offset,
s->header.backing_filename_size, bs->backing_file,
sizeof(bs->backing_file));
if (ret < 0) {
@@ -708,7 +708,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
}
if (cb->co) {
qemu_coroutine_enter(cb->co, NULL);
qemu_coroutine_enter(cb->co);
}
}
@@ -800,7 +800,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
@@ -837,7 +837,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
&copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_copy_from_backing_file_cb, copy_cb);
}
@@ -1087,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
next_fn, acb);
}
@@ -1319,7 +1319,7 @@ static void qed_aio_read_data(void *opaque, int ret,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
return;
@@ -1425,7 +1425,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
cb->done = true;
cb->ret = ret;
if (cb->co) {
qemu_coroutine_enter(cb->co, NULL);
qemu_coroutine_enter(cb->co);
}
}
@@ -1575,7 +1575,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
}
/* Write new header */
ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
g_free(buffer);
if (ret == 0) {
memcpy(&s->header, &new_header, sizeof(new_header));

View File

@@ -383,7 +383,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
bdrv_aio_writev(s->children[item->index], acb->sector_num,
acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
acb);
}
@@ -660,7 +660,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
&acb->qcrs[i].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[i]);
}
@@ -678,7 +678,7 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
acb->qcrs[acb->child_iter].aiocb =
bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
&acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[acb->child_iter]);
@@ -719,7 +719,7 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
qiov, nb_sectors, &quorum_aio_cb,
&acb->qcrs[i]);
}

View File

@@ -32,7 +32,7 @@
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
#include "raw-aio.h"
#include "block/raw-aio.h"
#include "qapi/util.h"
#include "qapi/qmp/qstring.h"
@@ -137,10 +137,6 @@ typedef struct BDRVRawState {
int open_flags;
size_t buf_align;
#ifdef CONFIG_LINUX_AIO
int use_aio;
LinuxAioState *aio_ctx;
#endif
#ifdef CONFIG_XFS
bool is_xfs:1;
#endif
@@ -154,9 +150,6 @@ typedef struct BDRVRawState {
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
#ifdef CONFIG_LINUX_AIO
int use_aio;
#endif
} BDRVRawReopenState;
static int fd_open(BlockDriverState *bs);
@@ -302,22 +295,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
/* For SCSI generic devices the alignment is not really used.
With buffered I/O, we don't have any restrictions. */
if (bdrv_is_sg(bs) || !s->needs_alignment) {
bs->request_alignment = 1;
bs->bl.request_alignment = 1;
s->buf_align = 1;
return;
}
bs->request_alignment = 0;
bs->bl.request_alignment = 0;
s->buf_align = 0;
/* Let's try to use the logical blocksize for the alignment. */
if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) {
bs->request_alignment = 0;
if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
bs->bl.request_alignment = 0;
}
#ifdef CONFIG_XFS
if (s->is_xfs) {
struct dioattr da;
if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
bs->request_alignment = da.d_miniosz;
bs->bl.request_alignment = da.d_miniosz;
/* The kernel returns wrong information for d_mem */
/* s->buf_align = da.d_mem; */
}
@@ -337,21 +330,21 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
qemu_vfree(buf);
}
if (!bs->request_alignment) {
if (!bs->bl.request_alignment) {
size_t align;
buf = qemu_memalign(s->buf_align, max_align);
for (align = 512; align <= max_align; align <<= 1) {
if (raw_is_io_aligned(fd, buf, align)) {
bs->request_alignment = align;
bs->bl.request_alignment = align;
break;
}
}
qemu_vfree(buf);
}
if (!s->buf_align || !bs->request_alignment) {
error_setg(errp, "Could not find working O_DIRECT alignment. "
"Try cache.direct=off.");
if (!s->buf_align || !bs->bl.request_alignment) {
error_setg(errp, "Could not find working O_DIRECT alignment");
error_append_hint(errp, "Try cache.direct=off\n");
}
}
@@ -374,58 +367,15 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
static void raw_detach_aio_context(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_aio) {
laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
}
#endif
}
static void raw_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
static bool raw_use_aio(int bdrv_flags)
{
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_aio) {
laio_attach_aio_context(s->aio_ctx, new_context);
}
#endif
}
#ifdef CONFIG_LINUX_AIO
static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
{
int ret = -1;
assert(aio_ctx != NULL);
assert(use_aio != NULL);
/*
* Currently Linux do AIO only for files opened with O_DIRECT
* specified so check NOCACHE flag too
*/
if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
(BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
/* if non-NULL, laio_init() has already been run */
if (*aio_ctx == NULL) {
*aio_ctx = laio_init();
if (!*aio_ctx) {
goto error;
}
}
*use_aio = 1;
} else {
*use_aio = 0;
}
ret = 0;
error:
return ret;
return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
(BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
}
#endif
@@ -494,13 +444,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
qemu_close(fd);
ret = -errno;
error_setg_errno(errp, -ret, "Could not set AIO state");
goto fail;
}
if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
error_setg(errp, "aio=native was specified, but it requires "
"cache.direct=on, which was not specified.");
ret = -EINVAL;
@@ -567,8 +511,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -603,18 +545,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
state->opaque = g_new0(BDRVRawReopenState, 1);
raw_s = state->opaque;
#ifdef CONFIG_LINUX_AIO
raw_s->use_aio = s->use_aio;
/* we can use s->aio_ctx instead of a copy, because the use_aio flag is
* valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
* won't override aio_ctx if aio_ctx is non-NULL */
if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
error_setg(errp, "Could not set AIO state");
return -1;
}
#endif
if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -639,15 +569,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
/* dup the original fd */
/* TODO: use qemu fcntl wrapper */
#ifdef F_DUPFD_CLOEXEC
raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
#else
raw_s->fd = dup(s->fd);
if (raw_s->fd != -1) {
qemu_set_cloexec(raw_s->fd);
}
#endif
raw_s->fd = qemu_dup(s->fd);
if (raw_s->fd >= 0) {
ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
if (ret) {
@@ -697,9 +619,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
qemu_close(s->fd);
s->fd = raw_s->fd;
#ifdef CONFIG_LINUX_AIO
s->use_aio = raw_s->use_aio;
#endif
g_free(state->opaque);
state->opaque = NULL;
@@ -745,8 +664,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
if (!fstat(s->fd, &st)) {
if (S_ISBLK(st.st_mode)) {
int ret = hdev_get_max_transfer_length(s->fd);
if (ret >= 0) {
bs->bl.max_transfer_length = ret;
if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
}
}
}
@@ -1295,7 +1214,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1305,8 +1224,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
acb->aio_nbytes = count;
acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -1314,7 +1233,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
assert(qiov->size == acb->aio_nbytes);
}
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -1337,9 +1256,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
} else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
}
}
@@ -1365,9 +1285,9 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_aio) {
laio_io_plug(bs, s->aio_ctx);
if (bs->open_flags & BDRV_O_NATIVE_AIO) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_plug(bs, aio);
}
#endif
}
@@ -1375,9 +1295,9 @@ static void raw_aio_plug(BlockDriverState *bs)
static void raw_aio_unplug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_aio) {
laio_io_unplug(bs, s->aio_ctx);
if (bs->open_flags & BDRV_O_NATIVE_AIO) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
}
#endif
}
@@ -1397,13 +1317,6 @@ static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
raw_detach_aio_context(bs);
#ifdef CONFIG_LINUX_AIO
if (s->use_aio) {
laio_cleanup(s->aio_ctx);
}
#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1873,13 +1786,13 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD);
}
@@ -1951,7 +1864,7 @@ BlockDriver bdrv_file = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_aio_pdiscard = raw_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
@@ -1962,9 +1875,6 @@ BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_detach_aio_context = raw_detach_aio_context,
.bdrv_attach_aio_context = raw_attach_aio_context,
.create_opts = &raw_create_opts,
};
@@ -2293,8 +2203,8 @@ static int fd_open(BlockDriverState *bs)
return -EIO;
}
static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -2302,7 +2212,7 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
if (fd_open(bs) < 0) {
return NULL;
}
return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
@@ -2397,7 +2307,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
@@ -2410,9 +2320,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_probe_blocksizes = hdev_probe_blocksizes,
.bdrv_probe_geometry = hdev_probe_geometry,
.bdrv_detach_aio_context = raw_detach_aio_context,
.bdrv_attach_aio_context = raw_attach_aio_context,
/* generic scsi device */
#ifdef __linux__
.bdrv_aio_ioctl = hdev_aio_ioctl,
@@ -2532,9 +2439,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_detach_aio_context = raw_detach_aio_context,
.bdrv_attach_aio_context = raw_attach_aio_context,
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2665,9 +2569,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_detach_aio_context = raw_detach_aio_context,
.bdrv_attach_aio_context = raw_attach_aio_context,
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,

View File

@@ -27,7 +27,7 @@
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "raw-aio.h"
#include "block/raw-aio.h"
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
@@ -142,7 +142,7 @@ static int aio_worker(void *arg)
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
@@ -155,11 +155,12 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == count);
}
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
acb->aio_nbytes = count;
acb->aio_offset = offset;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -222,7 +223,7 @@ static void raw_attach_aio_context(BlockDriverState *bs,
}
}
static void raw_probe_alignment(BlockDriverState *bs)
static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
DWORD sectorsPerCluster, freeClusters, totalClusters, count;
@@ -230,14 +231,14 @@ static void raw_probe_alignment(BlockDriverState *bs)
BOOL status;
if (s->type == FTYPE_CD) {
bs->request_alignment = 2048;
bs->bl.request_alignment = 2048;
return;
}
if (s->type == FTYPE_HARDDISK) {
status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
NULL, 0, &dg, sizeof(dg), &count, NULL);
if (status != 0) {
bs->request_alignment = dg.Geometry.BytesPerSector;
bs->bl.request_alignment = dg.Geometry.BytesPerSector;
return;
}
/* try GetDiskFreeSpace too */
@@ -247,7 +248,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
&dg.Geometry.BytesPerSector,
&freeClusters, &totalClusters);
bs->request_alignment = dg.Geometry.BytesPerSector;
bs->bl.request_alignment = dg.Geometry.BytesPerSector;
}
}
@@ -365,7 +366,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
@@ -379,9 +379,10 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_READ);
nb_sectors, cb, opaque, QEMU_AIO_READ);
} else {
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_READ);
}
}
@@ -393,9 +394,10 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
} else {
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_WRITE);
}
}
@@ -550,6 +552,7 @@ BlockDriver bdrv_file = {
.bdrv_needs_filename = true,
.bdrv_parse_filename = raw_parse_filename,
.bdrv_file_open = raw_open,
.bdrv_refresh_limits = raw_probe_alignment,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,

View File

@@ -1,6 +1,6 @@
/* BlockDriver implementation for "raw"
*
* Copyright (C) 2010, 2013, Red Hat, Inc.
* Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
* Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
*
@@ -50,33 +50,30 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
static int coroutine_fn
raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov, int flags)
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
void *buf = NULL;
BlockDriver *drv;
QEMUIOVector local_qiov;
int ret;
if (bs->probed && sector_num == 0) {
/* As long as these conditions are true, we can't get partial writes to
* the probe buffer and can just directly check the request. */
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
/* Handling partial writes would be a pain - so we just
* require that guests have 512-byte request alignment if
* probing occurred */
QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
if (nb_sectors == 0) {
/* qemu_iovec_to_buf() would fail, but we want to return success
* instead of -EINVAL in this case. */
return 0;
}
assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
buf = qemu_try_blockalign(bs->file->bs, 512);
if (!buf) {
@@ -105,8 +102,7 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = bdrv_co_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -131,13 +127,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count,
BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->file->bs, offset, count, flags);
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
return bdrv_co_pdiscard(bs->file->bs, offset, count);
}
static int64_t raw_getlength(BlockDriverState *bs)
@@ -152,7 +148,12 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl = bs->file->bs->bl;
if (bs->probed) {
/* To make it easier to protect the first sector, any probed
* image is restricted to read-modify-write on sub-sector
* operations. */
bs->bl.request_alignment = BDRV_SECTOR_SIZE;
}
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -197,8 +198,10 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA;
bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -243,10 +246,10 @@ BlockDriver bdrv_raw = {
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev_flags = &raw_co_writev_flags,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,

View File

@@ -649,9 +649,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
}
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
int64_t sector_num,
int64_t off,
QEMUIOVector *qiov,
int nb_sectors,
int64_t size,
BlockCompletionFunc *cb,
void *opaque,
RBDAIOCmd cmd)
@@ -659,7 +659,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
int64_t off, size;
char *buf;
int r;
@@ -668,6 +667,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
@@ -687,9 +687,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
buf = acb->bounce;
off = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
@@ -739,7 +736,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_READ);
}
@@ -750,7 +748,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_WRITE);
}
@@ -931,13 +930,13 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
}
#ifdef LIBRBD_SUPPORTS_DISCARD
static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
BlockCompletionFunc *cb,
void *opaque)
static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
int64_t offset,
int count,
BlockCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
RBD_AIO_DISCARD);
}
#endif
@@ -1001,7 +1000,7 @@ static BlockDriver bdrv_rbd = {
#endif
#ifdef LIBRBD_SUPPORTS_DISCARD
.bdrv_aio_discard = qemu_rbd_aio_discard,
.bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,

View File

@@ -495,7 +495,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
qemu_coroutine_enter(acb->coroutine, NULL);
qemu_coroutine_enter(acb->coroutine);
qemu_aio_unref(acb);
}
@@ -636,7 +636,7 @@ static void restart_co_req(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co, NULL);
qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
@@ -726,8 +726,8 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
if (qemu_in_coroutine()) {
do_co_req(&srco);
} else {
co = qemu_coroutine_create(do_co_req);
qemu_coroutine_enter(co, &srco);
co = qemu_coroutine_create(do_co_req, &srco);
qemu_coroutine_enter(co);
while (!srco.finished) {
aio_poll(aio_context, true);
}
@@ -925,17 +925,17 @@ static void co_read_response(void *opaque)
BDRVSheepdogState *s = opaque;
if (!s->co_recv) {
s->co_recv = qemu_coroutine_create(aio_read_response);
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
qemu_coroutine_enter(s->co_recv, opaque);
qemu_coroutine_enter(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
qemu_coroutine_enter(s->co_send, NULL);
qemu_coroutine_enter(s->co_send);
}
/*
@@ -2800,8 +2800,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
}
static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
{
SheepdogAIOCB *acb;
BDRVSheepdogState *s = bs->opaque;
@@ -2811,7 +2811,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
uint32_t zero = 0;
if (!s->discard_supported) {
return 0;
return 0;
}
memset(&discard_iov, 0, sizeof(discard_iov));
@@ -2820,7 +2820,10 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
iov.iov_len = sizeof(zero);
discard_iov.iov = &iov;
discard_iov.niov = 1;
acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
@@ -2954,7 +2957,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -2990,7 +2993,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -3026,7 +3029,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,

View File

@@ -508,36 +508,73 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
return ret;
}
static QemuOptsList ssh_runtime_opts = {
.name = "ssh",
.head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head),
.desc = {
{
.name = "host",
.type = QEMU_OPT_STRING,
.help = "Host to connect to",
},
{
.name = "port",
.type = QEMU_OPT_NUMBER,
.help = "Port to connect to",
},
{
.name = "path",
.type = QEMU_OPT_STRING,
.help = "Path of the image on the host",
},
{
.name = "user",
.type = QEMU_OPT_STRING,
.help = "User as which to connect",
},
{
.name = "host_key_check",
.type = QEMU_OPT_STRING,
.help = "Defines how and what to check the host key against",
},
},
};
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
QemuOpts *opts = NULL;
Error *local_err = NULL;
const char *host, *user, *path, *host_key_check;
int port;
if (!qdict_haskey(options, "host")) {
opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
ret = -EINVAL;
error_propagate(errp, local_err);
goto err;
}
host = qemu_opt_get(opts, "host");
if (!host) {
ret = -EINVAL;
error_setg(errp, "No hostname was specified");
goto err;
}
host = qdict_get_str(options, "host");
if (qdict_haskey(options, "port")) {
port = qdict_get_int(options, "port");
} else {
port = 22;
}
port = qemu_opt_get_number(opts, "port", 22);
if (!qdict_haskey(options, "path")) {
path = qemu_opt_get(opts, "path");
if (!path) {
ret = -EINVAL;
error_setg(errp, "No path was specified");
goto err;
}
path = qdict_get_str(options, "path");
if (qdict_haskey(options, "user")) {
user = qdict_get_str(options, "user");
} else {
user = qemu_opt_get(opts, "user");
if (!user) {
user = g_get_user_name();
if (!user) {
error_setg_errno(errp, errno, "Can't get user name");
@@ -546,9 +583,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
}
if (qdict_haskey(options, "host_key_check")) {
host_key_check = qdict_get_str(options, "host_key_check");
} else {
host_key_check = qemu_opt_get(opts, "host_key_check");
if (!host_key_check) {
host_key_check = "yes";
}
@@ -612,21 +648,14 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
goto err;
}
qemu_opts_del(opts);
r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
if (r < 0) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
/* Delete the options we've used; any not deleted will cause the
* block layer to give an error about unused options.
*/
qdict_del(options, "host");
qdict_del(options, "port");
qdict_del(options, "user");
qdict_del(options, "path");
qdict_del(options, "host_key_check");
return 0;
err:
@@ -646,6 +675,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
s->session = NULL;
qemu_opts_del(opts);
return ret;
}
@@ -777,7 +808,7 @@ static void restart_coroutine(void *opaque)
DPRINTF("co=%p", co);
qemu_coroutine_enter(co, NULL);
qemu_coroutine_enter(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)

View File

@@ -95,6 +95,7 @@ static void coroutine_fn stream_run(void *opaque)
BlockDriverState *base = s->base;
int64_t sector_num = 0;
int64_t end = -1;
uint64_t delay_ns = 0;
int error = 0;
int ret = 0;
int n = 0;
@@ -123,10 +124,8 @@ static void coroutine_fn stream_run(void *opaque)
}
for (sector_num = 0; sector_num < end; sector_num += n) {
uint64_t delay_ns = 0;
bool copy;
wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -156,12 +155,6 @@ wait:
}
trace_stream_one_iteration(s, sector_num, n, ret);
if (copy) {
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (delay_ns > 0) {
goto wait;
}
}
ret = stream_populate(blk, sector_num, n, buf);
}
if (ret < 0) {
@@ -182,6 +175,9 @@ wait:
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
if (copy && s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, n);
}
}
if (!base) {
@@ -218,15 +214,15 @@ static const BlockJobDriver stream_job_driver = {
.set_speed = stream_set_speed,
};
void stream_start(BlockDriverState *bs, BlockDriverState *base,
const char *backing_file_str, int64_t speed,
BlockdevOnError on_error,
BlockCompletionFunc *cb,
void *opaque, Error **errp)
void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
int64_t speed, BlockdevOnError on_error,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
StreamBlockJob *s;
s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
s = block_job_create(job_id, &stream_job_driver, bs, speed,
cb, opaque, errp);
if (!s) {
return;
}
@@ -235,7 +231,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
s->common.co = qemu_coroutine_create(stream_run);
s->common.co = qemu_coroutine_create(stream_run, s);
trace_stream_start(bs, base, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
qemu_coroutine_enter(s->common.co);
}

View File

@@ -1,4 +1,4 @@
# See docs/trace-events.txt for syntax documentation.
# See docs/tracing.txt for syntax documentation.
# block.c
bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
@@ -9,7 +9,7 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
# block/io.c
bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -58,7 +58,7 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p"
# block/raw-win32.c
# block/raw-posix.c
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
# block/qcow2.c
qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"

View File

@@ -403,7 +403,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
logout("\n");
ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
if (ret < 0) {
goto fail;
}
@@ -500,7 +500,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
bmap_size);
if (ret < 0) {
goto fail_free_bmap;
@@ -597,7 +597,7 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
ret = bdrv_co_preadv(bs->file->bs, data_offset, n_bytes,
ret = bdrv_co_preadv(bs->file, data_offset, n_bytes,
&local_qiov, 0);
}
logout("%u bytes read\n", n_bytes);
@@ -670,7 +670,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
ret = bdrv_pwrite(bs->file->bs, data_offset, block, s->block_size);
ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
qemu_co_mutex_unlock(&s->write_lock);
} else {
uint64_t data_offset = s->header.offset_data +
@@ -690,7 +690,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
ret = bdrv_co_pwritev(bs->file->bs, data_offset, n_bytes,
ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes,
&local_qiov, 0);
}
@@ -719,7 +719,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
ret = bdrv_write(bs->file->bs, 0, block, 1);
ret = bdrv_write(bs->file, 0, block, 1);
g_free(block);
block = NULL;
@@ -737,7 +737,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
ret = bdrv_write(bs->file, offset, base, n_sectors);
}
return ret;

View File

@@ -84,7 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
offset = log->offset + read;
ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
if (ret < 0) {
goto exit;
}
@@ -144,7 +144,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
}
offset = log->offset + read;
ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -194,7 +194,7 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
/* full */
break;
}
ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -466,7 +466,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
/* count is only > 1 if we are writing zeroes */
for (i = 0; i < count; i++) {
ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -945,7 +945,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
if (i == 0 && leading_length) {
/* partial sector at the front of the buffer */
ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
ret = bdrv_pread(bs->file, file_offset, merged_sector,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -955,7 +955,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
sector_write = merged_sector;
} else if (i == sectors - 1 && trailing_length) {
/* partial sector at the end of the buffer */
ret = bdrv_pread(bs->file->bs,
ret = bdrv_pread(bs->file,
file_offset,
merged_sector + trailing_length,
VHDX_LOG_SECTOR_SIZE - trailing_length);

View File

@@ -298,9 +298,10 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
* and then update the header checksum. Header is converted to proper
* endianness before being written to the specified file offset
*/
static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
uint64_t offset, bool read)
{
BlockDriverState *bs_file = file->bs;
uint8_t *buffer = NULL;
int ret;
VHDXHeader *header_le;
@@ -315,7 +316,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
if (read) {
/* if true, we can't assume the extra reserved bytes are 0 */
ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto exit;
}
@@ -329,7 +330,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
vhdx_header_le_export(hdr, header_le);
vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
offsetof(VHDXHeader, checksum));
ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@@ -378,7 +379,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
inactive_header->log_guid = *log_guid;
}
ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
if (ret < 0) {
goto exit;
}
@@ -430,7 +431,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -447,7 +448,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
}
ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -521,7 +522,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
@@ -634,7 +635,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
@@ -737,7 +738,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
goto exit;
}
ret = bdrv_pread(bs->file->bs,
ret = bdrv_pread(bs->file,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
@@ -772,7 +773,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
/* determine virtual disk size, logical sector size,
* and phys sector size */
ret = bdrv_pread(bs->file->bs,
ret = bdrv_pread(bs->file,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
@@ -780,7 +781,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file->bs,
ret = bdrv_pread(bs->file,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
@@ -788,7 +789,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file->bs,
ret = bdrv_pread(bs->file,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
@@ -905,7 +906,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->regions);
/* validate the file signature */
ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
@@ -964,7 +965,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
@@ -1117,7 +1118,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
case PAYLOAD_BLOCK_FULLY_PRESENT:
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file->bs,
ret = bdrv_co_readv(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1326,7 +1327,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
}
/* block exists, so we can just overwrite it */
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_writev(bs->file->bs,
ret = bdrv_co_writev(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sectors_to_write, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1387,9 +1388,11 @@ exit:
* There are 2 headers, and the highest sequence number will represent
* the active header
*/
static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
{
BlockDriverState *bs = blk_bs(blk);
BdrvChild *child;
int ret = 0;
VHDXHeader *hdr = NULL;
@@ -1404,12 +1407,18 @@ static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
vhdx_guid_generate(&hdr->file_write_guid);
vhdx_guid_generate(&hdr->data_write_guid);
ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
/* XXX Ugly way to get blk->root, but that's a feature, not a bug. This
* hack makes it obvious that vhdx_write_header() bypasses the BlockBackend
* here, which it really shouldn't be doing. */
child = QLIST_FIRST(&bs->parents);
assert(!QLIST_NEXT(child, next_parent));
ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false);
if (ret < 0) {
goto exit;
}
hdr->sequence_number++;
ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false);
if (ret < 0) {
goto exit;
}
@@ -1442,7 +1451,7 @@ exit:
* The first 64KB of the Metadata section is reserved for the metadata
* header and entries; beyond that, the metadata items themselves reside.
*/
static int vhdx_create_new_metadata(BlockDriverState *bs,
static int vhdx_create_new_metadata(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1538,13 +1547,13 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
VHDX_META_FLAGS_IS_VIRTUAL_DISK;
vhdx_metadata_entry_le_export(&md_table_entry[4]);
ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
VHDX_METADATA_ENTRY_BUFFER_SIZE);
ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1564,7 +1573,7 @@ exit:
* Fixed images: default state of the BAT is fully populated, with
* file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
*/
static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
bool use_zero_blocks, uint64_t file_offset,
uint32_t length)
@@ -1588,12 +1597,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
if (type == VHDX_TYPE_DYNAMIC) {
/* All zeroes, so we can just extend the file - the end of the BAT
* is the furthest thing we have written yet */
ret = bdrv_truncate(bs, data_file_offset);
ret = blk_truncate(blk, data_file_offset);
if (ret < 0) {
goto exit;
}
} else if (type == VHDX_TYPE_FIXED) {
ret = bdrv_truncate(bs, data_file_offset + image_size);
ret = blk_truncate(blk, data_file_offset + image_size);
if (ret < 0) {
goto exit;
}
@@ -1604,7 +1613,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
if (type == VHDX_TYPE_FIXED ||
use_zero_blocks ||
bdrv_has_zero_init(bs) == 0) {
bdrv_has_zero_init(blk_bs(blk)) == 0) {
/* for a fixed file, the default BAT entry is not zero */
s->bat = g_try_malloc0(length);
if (length && s->bat == NULL) {
@@ -1620,12 +1629,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
sinfo.file_offset = data_file_offset +
(sector_num << s->logical_sector_size_bits);
sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused,
block_state);
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
ret = bdrv_pwrite(bs, file_offset, s->bat, length);
ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
if (ret < 0) {
goto exit;
}
@@ -1645,7 +1654,7 @@ exit:
* to create the BAT itself, we will also cause the BAT to be
* created.
*/
static int vhdx_create_new_region_table(BlockDriverState *bs,
static int vhdx_create_new_region_table(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1720,21 +1729,21 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
/* The region table gives us the data we need to create the BAT,
* so do that now */
ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
bat_file_offset, bat_length);
if (ret < 0) {
goto exit;
}
/* Now write out the region headers to disk */
ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1871,13 +1880,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
ret = vhdx_create_new_headers(blk, image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1885,7 +1894,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;

View File

@@ -252,7 +252,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
int ret;
desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
g_free(desc);
return 0;
@@ -286,7 +286,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
desc = g_malloc0(DESC_SIZE);
tmp_desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
goto out;
}
@@ -306,7 +306,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
pstrcat(desc, DESC_SIZE, tmp_desc);
}
ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
out:
g_free(desc);
@@ -350,7 +350,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
int ret;
desc = g_malloc0(DESC_SIZE + 1);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
goto out;
}
@@ -454,7 +454,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
return -ENOMEM;
}
ret = bdrv_pread(extent->file->bs,
ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
l1_size);
@@ -474,7 +474,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
ret = -ENOMEM;
goto fail_l1;
}
ret = bdrv_pread(extent->file->bs,
ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
l1_size);
@@ -508,7 +508,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
VMDK3Header header;
VmdkExtent *extent;
ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
@@ -538,14 +538,13 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
QDict *options, Error **errp);
static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
Error **errp)
static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
{
int64_t size;
char *buf;
int ret;
size = bdrv_getlength(file);
size = bdrv_getlength(file->bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not access file");
return NULL;
@@ -586,7 +585,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
int64_t l1_backup_offset = 0;
bool compressed;
ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
@@ -596,7 +595,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (header.capacity == 0) {
uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
if (!buf) {
return -EINVAL;
}
@@ -636,7 +635,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
} QEMU_PACKED eos_marker;
} QEMU_PACKED footer;
ret = bdrv_pread(file->bs,
ret = bdrv_pread(file,
bs->file->bs->total_sectors * 512 - 1536,
&footer, sizeof(footer));
if (ret < 0) {
@@ -874,7 +873,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
extent->flat_start_offset = flat_offset << 9;
} else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
/* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
char *buf = vmdk_read_desc(extent_file, 0, errp);
if (!buf) {
ret = -EINVAL;
} else {
@@ -943,7 +942,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
buf = vmdk_read_desc(bs->file->bs, 0, errp);
buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;
}
@@ -1046,14 +1045,14 @@ static int get_whole_cluster(BlockDriverState *bs,
/* Read backing data before skip range */
if (skip_start_bytes > 0) {
if (bs->backing) {
ret = bdrv_pread(bs->backing->bs, offset, whole_grain,
ret = bdrv_pread(bs->backing, offset, whole_grain,
skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain,
ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain,
skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
@@ -1063,7 +1062,7 @@ static int get_whole_cluster(BlockDriverState *bs,
/* Read backing data after skip range */
if (skip_end_bytes < cluster_bytes) {
if (bs->backing) {
ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes,
ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
whole_grain + skip_end_bytes,
cluster_bytes - skip_end_bytes);
if (ret < 0) {
@@ -1071,7 +1070,7 @@ static int get_whole_cluster(BlockDriverState *bs,
goto exit;
}
}
ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes,
ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
whole_grain + skip_end_bytes,
cluster_bytes - skip_end_bytes);
if (ret < 0) {
@@ -1091,8 +1090,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
{
offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file->bs,
if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1101,8 +1099,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
if (bdrv_pwrite_sync(
extent->file->bs,
if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1191,8 +1188,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
}
l2_table = extent->l2_cache + (min_index * extent->l2_size);
if (bdrv_pread(
extent->file->bs,
if (bdrv_pread(extent->file,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
@@ -1206,13 +1202,6 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
cluster_sector = le32_to_cpu(l2_table[l2_index]);
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
@@ -1235,6 +1224,13 @@ static int get_cluster_offset(BlockDriverState *bs,
if (ret) {
return ret;
}
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
@@ -1373,7 +1369,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
}
write_offset = cluster_offset + offset_in_cluster,
ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes,
ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
&local_qiov, 0);
write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
@@ -1411,7 +1407,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
if (!extent->compressed) {
ret = bdrv_co_preadv(extent->file->bs,
ret = bdrv_co_preadv(extent->file,
cluster_offset + offset_in_cluster, bytes,
qiov, 0);
if (ret < 0) {
@@ -1424,7 +1420,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
buf_bytes = cluster_bytes * 2;
cluster_buf = g_malloc(buf_bytes);
uncomp_buf = g_malloc(cluster_bytes);
ret = bdrv_pread(extent->file->bs,
ret = bdrv_pread(extent->file,
cluster_offset,
cluster_buf, buf_bytes);
if (ret < 0) {
@@ -1501,7 +1497,7 @@ vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
ret = bdrv_co_preadv(bs->backing->bs, offset, n_bytes,
ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
&local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1649,56 +1645,11 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
return ret;
}
typedef struct VmdkWriteCompressedCo {
BlockDriverState *bs;
int64_t sector_num;
const uint8_t *buf;
int nb_sectors;
int ret;
} VmdkWriteCompressedCo;
static void vmdk_co_write_compressed(void *opaque)
static int coroutine_fn
vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov)
{
VmdkWriteCompressedCo *co = opaque;
QEMUIOVector local_qiov;
uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
struct iovec iov = (struct iovec) {
.iov_base = (uint8_t*) co->buf,
.iov_len = bytes,
};
qemu_iovec_init_external(&local_qiov, &iov, 1);
co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
}
static int vmdk_write_compressed(BlockDriverState *bs,
int64_t sector_num,
const uint8_t *buf,
int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
if (s->num_extents == 1 && s->extents[0].compressed) {
Coroutine *co;
AioContext *aio_context = bdrv_get_aio_context(bs);
VmdkWriteCompressedCo data = {
.bs = bs,
.sector_num = sector_num,
.buf = buf,
.nb_sectors = nb_sectors,
.ret = -EINPROGRESS,
};
co = qemu_coroutine_create(vmdk_co_write_compressed);
qemu_coroutine_enter(co, &data);
while (data.ret == -EINPROGRESS) {
aio_poll(aio_context, true);
}
return data.ret;
} else {
return -ENOTSUP;
}
return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
}
static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2397,7 +2348,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
.bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,

View File

@@ -237,7 +237,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
error_setg(errp, "Unable to read VHD header");
goto fail;
@@ -257,7 +257,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
/* If a fixed disk, the footer is found only at the end of the file */
ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -328,7 +328,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
if (disk_type == VHD_DYNAMIC) {
ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
error_setg(errp, "Error reading dynamic VHD header");
@@ -385,7 +385,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
pagetable_size);
if (ret < 0) {
error_setg(errp, "Error reading pagetable");
@@ -481,7 +481,7 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
}
return block_offset;
@@ -505,7 +505,7 @@ static int rewrite_footer(BlockDriverState* bs)
BDRVVPCState *s = bs->opaque;
int64_t offset = s->free_data_block_offset;
ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
if (ret < 0)
return ret;
@@ -539,7 +539,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
/* Initialize the block's bitmap */
memset(bitmap, 0xff, s->bitmap_size);
ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
s->bitmap_size);
if (ret < 0) {
return ret;
@@ -554,7 +554,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
/* Write BAT entry to disk */
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
@@ -591,7 +591,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
}
qemu_co_mutex_lock(&s->lock);
@@ -607,7 +607,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
&local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -640,7 +640,7 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
}
qemu_co_mutex_lock(&s->lock);
@@ -661,7 +661,7 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
qemu_iovec_reset(&local_qiov);
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
&local_qiov, 0);
if (ret < 0) {
goto fail;

View File

@@ -341,9 +341,8 @@ typedef struct BDRVVVFATState {
unsigned int current_cluster;
/* write support */
BlockDriverState* write_target;
char* qcow_filename;
BlockDriverState* qcow;
BdrvChild* qcow;
void* fat2;
char* used_clusters;
array_t commits;
@@ -981,7 +980,7 @@ static int init_directories(BDRVVVFATState* s,
static BDRVVVFATState *vvv = NULL;
#endif
static int enable_write_target(BDRVVVFATState *s, Error **errp);
static int enable_write_target(BlockDriverState *bs, Error **errp);
static int is_consistent(BDRVVVFATState *s);
static QemuOptsList runtime_opts = {
@@ -1158,8 +1157,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
s->current_cluster=0xffffffff;
/* read only is the default for safety */
bs->read_only = 1;
s->qcow = s->write_target = NULL;
bs->read_only = true;
s->qcow = NULL;
s->qcow_filename = NULL;
s->fat2 = NULL;
s->downcase_short_names = 1;
@@ -1170,14 +1169,13 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
ret = enable_write_target(s, errp);
ret = enable_write_target(bs, errp);
if (ret < 0) {
goto fail;
}
bs->read_only = 0;
bs->read_only = false;
}
bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
bs->total_sectors = cyls * heads * secs;
if (init_directories(s, dirname, heads, secs, errp)) {
@@ -1209,6 +1207,11 @@ fail:
return ret;
}
static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
}
static inline void vvfat_close_current_file(BDRVVVFATState *s)
{
if(s->current_mapping) {
@@ -1387,9 +1390,10 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
return -1;
if (s->qcow) {
int n;
if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) {
DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
(int)sector_num, n));
if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
return -1;
}
i += n - 1;
@@ -1665,12 +1669,15 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
int was_modified = 0;
int i, dummy;
if (s->qcow == NULL)
return 0;
if (s->qcow == NULL) {
return 0;
}
for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
was_modified = bdrv_is_allocated(s->qcow,
cluster2sector(s, cluster_num) + i, 1, &dummy);
for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
was_modified = bdrv_is_allocated(s->qcow->bs,
cluster2sector(s, cluster_num) + i,
1, &dummy);
}
return was_modified;
}
@@ -1819,11 +1826,16 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
vvfat_close_current_file(s);
for (i = 0; i < s->sectors_per_cluster; i++) {
if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) {
if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
int res;
res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
if (!res) {
res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
if (res) {
return -1;
}
if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) {
res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1);
if (res) {
return -2;
}
}
@@ -2779,8 +2791,8 @@ static int do_commit(BDRVVVFATState* s)
return ret;
}
if (s->qcow->drv->bdrv_make_empty) {
s->qcow->drv->bdrv_make_empty(s->qcow);
if (s->qcow->bs->drv->bdrv_make_empty) {
s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
}
memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
@@ -2946,7 +2958,7 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
static void write_target_close(BlockDriverState *bs) {
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
bdrv_unref(s->qcow);
bdrv_unref_child(s->bs, s->qcow);
g_free(s->qcow_filename);
}
@@ -2956,8 +2968,19 @@ static BlockDriver vvfat_write_target = {
.bdrv_close = write_target_close,
};
static int enable_write_target(BDRVVVFATState *s, Error **errp)
static void vvfat_qcow_options(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
{
*child_flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
}
static const BdrvChildRole child_vvfat_qcow = {
.inherit_options = vvfat_qcow_options,
};
static int enable_write_target(BlockDriverState *bs, Error **errp)
{
BDRVVVFATState *s = bs->opaque;
BlockDriver *bdrv_qcow = NULL;
BlockDriverState *backing;
QemuOpts *opts = NULL;
@@ -2995,9 +3018,10 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
}
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow"));
s->qcow = bdrv_open(s->qcow_filename, NULL, options,
BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
&child_vvfat_qcow, false, errp);
QDECREF(options);
if (!s->qcow) {
ret = -EINVAL;
goto err;
@@ -3046,6 +3070,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
.bdrv_co_preadv = vvfat_co_preadv,

View File

@@ -27,7 +27,7 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "block/aio.h"
#include "raw-aio.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
#include "qemu/iov.h"
#include <windows.h>

View File

@@ -145,7 +145,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs = NULL;
BlockBackend *on_eject_blk;
NBDExport *exp;
if (!nbd_server) {
@@ -158,26 +159,22 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
return;
}
blk = blk_by_name(device);
if (!blk) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
"Device '%s' not found", device);
return;
}
if (!blk_is_inserted(blk)) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
on_eject_blk = blk_by_name(device);
bs = bdrv_lookup_bs(device, device, errp);
if (!bs) {
return;
}
if (!has_writable) {
writable = false;
}
if (blk_is_read_only(blk)) {
if (bdrv_is_read_only(bs)) {
writable = false;
}
exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
errp);
exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
NULL, false, on_eject_blk, errp);
if (!exp) {
return;
}

File diff suppressed because it is too large Load Diff

View File

@@ -33,6 +33,7 @@
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"
#include "qemu/coroutine.h"
#include "qemu/id.h"
#include "qmp-commands.h"
#include "qemu/timer.h"
#include "qapi-event.h"
@@ -60,6 +61,19 @@ BlockJob *block_job_next(BlockJob *job)
return QLIST_NEXT(job, job_list);
}
BlockJob *block_job_get(const char *id)
{
BlockJob *job;
QLIST_FOREACH(job, &block_jobs, job_list) {
if (!strcmp(id, job->id)) {
return job;
}
}
return NULL;
}
/* Normally the job runs in its BlockBackend's AioContext. The exception is
* block_job_defer_to_main_loop() where it runs in the QEMU main loop. Code
* that supports both cases uses this helper function.
@@ -103,9 +117,9 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
int64_t speed, BlockCompletionFunc *cb,
void *opaque, Error **errp)
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
@@ -116,6 +130,24 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
return NULL;
}
if (job_id == NULL) {
job_id = bdrv_get_device_name(bs);
if (!*job_id) {
error_setg(errp, "An explicit job ID is required for this node");
return NULL;
}
}
if (!id_wellformed(job_id)) {
error_setg(errp, "Invalid job ID '%s'", job_id);
return NULL;
}
if (block_job_get(job_id)) {
error_setg(errp, "Job ID '%s' already in use", job_id);
return NULL;
}
blk = blk_new();
blk_insert_bs(blk, bs);
@@ -126,7 +158,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
job->id = g_strdup(bdrv_get_device_name(bs));
job->id = g_strdup(job_id);
job->blk = blk;
job->cb = cb;
job->opaque = opaque;
@@ -290,7 +322,8 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
void block_job_complete(BlockJob *job, Error **errp)
{
if (job->pause_count || job->cancelled || !job->driver->complete) {
error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
error_setg(errp, "The active block job '%s' cannot be completed",
job->id);
return;
}
@@ -346,7 +379,7 @@ void block_job_resume(BlockJob *job)
void block_job_enter(BlockJob *job)
{
if (job->co && !job->busy) {
qemu_coroutine_enter(job->co, NULL);
qemu_coroutine_enter(job->co);
}
}
@@ -524,6 +557,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
case BLOCKDEV_ON_ERROR_AUTO:
action = (error == ENOSPC) ?
BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
break;

View File

@@ -162,4 +162,4 @@ struct target_vm86plus_struct {
#define UNAME_MACHINE "i386"
#endif /* TARGET_SYSCALL_H */
#endif /* TARGET_SYSCALL_H */

View File

@@ -17,10 +17,12 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu-version.h"
#include <machine/trap.h>
#include "qapi/error.h"
#include "qemu.h"
#include "qemu/config-file.h"
#include "qemu/path.h"
#include "qemu/help_option.h"
/* For tb_lock */
@@ -30,6 +32,8 @@
#include "qemu/timer.h"
#include "qemu/envlist.h"
#include "exec/log.h"
#include "trace/control.h"
#include "glib-compat.h"
int singlestep;
unsigned long mmap_min_addr;
@@ -168,7 +172,7 @@ void cpu_loop(CPUX86State *env)
//target_siginfo_t info;
for(;;) {
trapnr = cpu_x86_exec(cs);
trapnr = cpu_exec(cs);
switch(trapnr) {
case 0x80:
/* syscall from int $0x80 */
@@ -509,7 +513,7 @@ void cpu_loop(CPUSPARCState *env)
//target_siginfo_t info;
while (1) {
trapnr = cpu_sparc_exec(cs);
trapnr = cpu_exec(cs);
switch (trapnr) {
#ifndef TARGET_SPARC64
@@ -664,7 +668,8 @@ void cpu_loop(CPUSPARCState *env)
static void usage(void)
{
printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
", " QEMU_COPYRIGHT "\n"
"usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
"BSD CPU emulator (compiled for %s emulation)\n"
"\n"
@@ -687,6 +692,8 @@ static void usage(void)
"-p pagesize set the host page size to 'pagesize'\n"
"-singlestep always run in singlestep mode\n"
"-strace log system calls\n"
"-trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
" specify tracing options\n"
"\n"
"Environment variables:\n"
"QEMU_STRACE Print system calls and arguments similar to the\n"
@@ -735,6 +742,7 @@ int main(int argc, char **argv)
int gdbstub_port = 0;
char **target_environ, **wrk;
envlist_t *envlist = NULL;
char *trace_file = NULL;
bsd_type = target_openbsd;
if (argc <= 1)
@@ -754,8 +762,10 @@ int main(int argc, char **argv)
cpu_model = NULL;
qemu_add_opts(&qemu_trace_opts);
optind = 1;
for(;;) {
for (;;) {
if (optind >= argc)
break;
r = argv[optind];
@@ -840,8 +850,10 @@ int main(int argc, char **argv)
singlestep = 1;
} else if (!strcmp(r, "strace")) {
do_strace = 1;
} else
{
} else if (!strcmp(r, "trace")) {
g_free(trace_file);
trace_file = trace_opt_parse(optarg);
} else {
usage();
}
}
@@ -865,6 +877,11 @@ int main(int argc, char **argv)
}
filename = argv[optind];
if (!trace_init_backends()) {
exit(1);
}
trace_init_file(trace_file);
/* Zero out regs */
memset(regs, 0, sizeof(struct target_pt_regs));
@@ -1116,6 +1133,7 @@ int main(int argc, char **argv)
gdbserver_start (gdbstub_port);
gdb_handlesig(cpu, 0);
}
trace_init_vcpu_events();
cpu_loop(env);
/* never exits */
return 0;

View File

@@ -209,8 +209,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_addr);
int target_msync(abi_ulong start, abi_ulong len, int flags);
extern unsigned long last_brk;
void cpu_list_lock(void);
void cpu_list_unlock(void);
#if defined(CONFIG_USE_NPTL)
void mmap_fork_start(void);
void mmap_fork_end(int child);
@@ -358,7 +356,7 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy
#ifdef DEBUG_REMAP
{
void *addr;
addr = malloc(len);
addr = g_malloc(len);
if (copy)
memcpy(addr, g2h(guest_addr), len);
else
@@ -384,7 +382,7 @@ static inline void unlock_user(void *host_ptr, abi_ulong guest_addr,
return;
if (len > 0)
memcpy(g2h(guest_addr), host_ptr, len);
free(host_ptr);
g_free(host_ptr);
#endif
}

View File

@@ -11,4 +11,4 @@ struct target_pt_regs {
#define UNAME_MACHINE "sun4"
#endif /* TARGET_SYSCALL_H */
#endif /* TARGET_SYSCALL_H */

View File

@@ -12,4 +12,4 @@ struct target_pt_regs {
#define UNAME_MACHINE "sun4u"
#endif /* TARGET_SYSCALL_H */
#endif /* TARGET_SYSCALL_H */

View File

@@ -118,4 +118,4 @@ struct target_msqid64_ds {
#define TARGET_ARCH_GET_FS 0x1003
#define TARGET_ARCH_GET_GS 0x1004
#endif /* TARGET_SYSCALL_H */
#endif /* TARGET_SYSCALL_H */

96
configure vendored
View File

@@ -229,6 +229,7 @@ xfs=""
vhost_net="no"
vhost_scsi="no"
vhost_vsock="no"
kvm="no"
rdma=""
gprof="no"
@@ -305,8 +306,8 @@ archipelago="no"
gtk=""
gtkabi=""
gtk_gl="no"
tls_priority="NORMAL"
gnutls=""
gnutls_hash=""
gnutls_rnd=""
nettle=""
nettle_kdf="no"
@@ -369,6 +370,7 @@ fi
ar="${AR-${cross_prefix}ar}"
as="${AS-${cross_prefix}as}"
ccas="${CCAS-$cc}"
cpp="${CPP-$cc -E}"
objcopy="${OBJCOPY-${cross_prefix}objcopy}"
ld="${LD-${cross_prefix}ld}"
@@ -673,6 +675,7 @@ Haiku)
kvm="yes"
vhost_net="yes"
vhost_scsi="yes"
vhost_vsock="yes"
QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
;;
esac
@@ -1016,6 +1019,10 @@ for opt do
;;
--enable-vhost-scsi) vhost_scsi="yes"
;;
--disable-vhost-vsock) vhost_vsock="no"
;;
--enable-vhost-vsock) vhost_vsock="yes"
;;
--disable-opengl) opengl="no"
;;
--enable-opengl) opengl="yes"
@@ -1097,6 +1104,8 @@ for opt do
;;
--enable-gtk) gtk="yes"
;;
--tls-priority=*) tls_priority="$optarg"
;;
--disable-gnutls) gnutls="no"
;;
--enable-gnutls) gnutls="yes"
@@ -1308,6 +1317,7 @@ Advanced options (experts only):
--disable-blobs disable installing provided firmware blobs
--with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent
--with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb)
--tls-priority default TLS protocol/cipher priority string
Optional features, enabled with --enable-FEATURE and
disabled with --disable-FEATURE, default is enabled if available:
@@ -1448,7 +1458,7 @@ fi
gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wendif-labels $gcc_flags"
gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
# Note that we do not add -Werror to gcc_flags here, because that would
@@ -1784,7 +1794,9 @@ fi
##########################################
# avx2 optimization requirement check
cat > $TMPC << EOF
if test "$static" = "no" ; then
cat > $TMPC << EOF
#pragma GCC push_options
#pragma GCC target("avx2")
#include <cpuid.h>
@@ -1797,12 +1809,13 @@ static void *bar_ifunc(void) {return (void*) bar;}
int foo(void *a) __attribute__((ifunc("bar_ifunc")));
int main(int argc, char *argv[]) { return foo(argv[0]);}
EOF
if compile_object "" ; then
if has readelf; then
if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
avx2_opt="yes"
fi
fi
if compile_object "" ; then
if has readelf; then
if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
avx2_opt="yes"
fi
fi
fi
fi
#########################################
@@ -2218,13 +2231,6 @@ if test "$gnutls" != "no"; then
QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
gnutls="yes"
# gnutls_hash_init requires >= 2.9.10
if $pkg_config --exists "gnutls >= 2.9.10"; then
gnutls_hash="yes"
else
gnutls_hash="no"
fi
# gnutls_rnd requires >= 2.11.0
if $pkg_config --exists "gnutls >= 2.11.0"; then
gnutls_rnd="yes"
@@ -2258,11 +2264,9 @@ if test "$gnutls" != "no"; then
feature_not_found "gnutls" "Install gnutls devel"
else
gnutls="no"
gnutls_hash="no"
gnutls_rnd="no"
fi
else
gnutls_hash="no"
gnutls_rnd="no"
fi
@@ -3126,6 +3130,7 @@ else
if test "$found" = "no"; then
LIBS="$pthread_lib $LIBS"
fi
PTHREAD_LIB="$pthread_lib"
break
fi
done
@@ -4055,13 +4060,13 @@ fi
if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$vss_win32_sdk" != "no" ; then
case "$vss_win32_sdk" in
"") vss_win32_include="-I$source_path" ;;
"") vss_win32_include="-isystem $source_path" ;;
*\ *) # The SDK is installed in "Program Files" by default, but we cannot
# handle path with spaces. So we symlink the headers into ".sdk/vss".
vss_win32_include="-I$source_path/.sdk/vss"
vss_win32_include="-isystem $source_path/.sdk/vss"
symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
;;
*) vss_win32_include="-I$vss_win32_sdk"
*) vss_win32_include="-isystem $vss_win32_sdk"
esac
cat > $TMPC << EOF
#define __MIDL_user_allocate_free_DEFINED__
@@ -4192,6 +4197,18 @@ if compile_prog "" "" ; then
posix_madvise=yes
fi
##########################################
# check if we have posix_syslog
posix_syslog=no
cat > $TMPC << EOF
#include <syslog.h>
int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
EOF
if compile_prog "" "" ; then
posix_syslog=yes
fi
##########################################
# check if trace backend exists
@@ -4700,7 +4717,16 @@ roms=
if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
"$targetos" != "Darwin" -a "$targetos" != "SunOS" -a \
"$softmmu" = yes ; then
roms="optionrom"
# Different host OS linkers have different ideas about the name of the ELF
# emulation. Linux and OpenBSD use 'elf_i386'; FreeBSD uses the _fbsd
# variant; and Windows uses i386pe.
for emu in elf_i386 elf_i386_fbsd i386pe; do
if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
ld_i386_emulation="$emu"
roms="optionrom"
break
fi
done
fi
if test "$cpu" = "ppc64" -a "$targetos" != "Darwin" ; then
roms="$roms spapr-rtas"
@@ -4812,8 +4838,8 @@ echo "SDL support $sdl $(echo_version $sdl $sdlversion)"
echo "GTK support $gtk $(echo_version $gtk $gtk_version)"
echo "GTK GL support $gtk_gl"
echo "VTE support $vte $(echo_version $vte $vteversion)"
echo "TLS priority $tls_priority"
echo "GNUTLS support $gnutls"
echo "GNUTLS hash $gnutls_hash"
echo "GNUTLS rnd $gnutls_rnd"
echo "libgcrypt $gcrypt"
echo "libgcrypt kdf $gcrypt_kdf"
@@ -4863,6 +4889,7 @@ echo "uuid support $uuid"
echo "libcap-ng support $cap_ng"
echo "vhost-net support $vhost_net"
echo "vhost-scsi support $vhost_scsi"
echo "vhost-vsock support $vhost_vsock"
echo "Trace backends $trace_backends"
if have_backend "simple"; then
echo "Trace output file $trace_file-<pid>"
@@ -5176,12 +5203,10 @@ if test "$gtk" = "yes" ; then
echo "CONFIG_GTK_GL=y" >> $config_host_mak
fi
fi
echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
if test "$gnutls" = "yes" ; then
echo "CONFIG_GNUTLS=y" >> $config_host_mak
fi
if test "$gnutls_hash" = "yes" ; then
echo "CONFIG_GNUTLS_HASH=y" >> $config_host_mak
fi
if test "$gnutls_rnd" = "yes" ; then
echo "CONFIG_GNUTLS_RND=y" >> $config_host_mak
fi
@@ -5246,6 +5271,9 @@ fi
if test "$vhost_net" = "yes" ; then
echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
fi
if test "$vhost_vsock" = "yes" ; then
echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
fi
if test "$blobs" = "yes" ; then
echo "INSTALL_BLOBS=yes" >> $config_host_mak
fi
@@ -5462,6 +5490,13 @@ if have_backend "ftrace"; then
feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
fi
fi
if have_backend "syslog"; then
if test "$posix_syslog" = "yes" ; then
echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
else
feature_not_found "syslog(trace backend)" "syslog not available"
fi
fi
echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
if test "$rdma" = "yes" ; then
@@ -5517,6 +5552,7 @@ echo "OBJCC=$objcc" >> $config_host_mak
echo "AR=$ar" >> $config_host_mak
echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
echo "AS=$as" >> $config_host_mak
echo "CCAS=$ccas" >> $config_host_mak
echo "CPP=$cpp" >> $config_host_mak
echo "OBJCOPY=$objcopy" >> $config_host_mak
echo "LD=$ld" >> $config_host_mak
@@ -5541,8 +5577,10 @@ fi
echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
echo "LIBS+=$LIBS" >> $config_host_mak
echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
echo "PTHREAD_LIB=$PTHREAD_LIB" >> $config_host_mak
echo "EXESUF=$EXESUF" >> $config_host_mak
echo "DSOSUF=$DSOSUF" >> $config_host_mak
echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
@@ -5990,6 +6028,7 @@ for rom in seabios vgabios ; do
echo "# Automatically generated by configure - do not modify" > $config_mak
echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak
echo "AS=$as" >> $config_mak
echo "CCAS=$ccas" >> $config_mak
echo "CC=$cc" >> $config_mak
echo "BCC=bcc" >> $config_mak
echo "CPP=$cpp" >> $config_mak
@@ -5998,6 +6037,11 @@ for rom in seabios vgabios ; do
echo "LD=$ld" >> $config_mak
done
# set up tests data directory
if [ ! -e tests/data ]; then
symlink "$source_path/tests/data" tests/data
fi
# set up qemu-iotests in this build directory
iotests_common_env="tests/qemu-iotests/common.env"
iotests_check="tests/qemu-iotests/check"

View File

@@ -6,8 +6,8 @@
* top-level directory.
*/
#ifndef _IVSHMEM_CLIENT_H_
#define _IVSHMEM_CLIENT_H_
#ifndef IVSHMEM_CLIENT_H
#define IVSHMEM_CLIENT_H
/**
* This file provides helper to implement an ivshmem client. It is used
@@ -209,4 +209,4 @@ ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
*/
void ivshmem_client_dump(const IvshmemClient *client);
#endif /* _IVSHMEM_CLIENT_H_ */
#endif /* IVSHMEM_CLIENT_H */

View File

@@ -6,8 +6,8 @@
* top-level directory.
*/
#ifndef _IVSHMEM_SERVER_H_
#define _IVSHMEM_SERVER_H_
#ifndef IVSHMEM_SERVER_H
#define IVSHMEM_SERVER_H
/**
* The ivshmem server is a daemon that creates a unix socket in listen
@@ -163,4 +163,4 @@ ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
*/
void ivshmem_server_dump(const IvshmemServer *server);
#endif /* _IVSHMEM_SERVER_H_ */
#endif /* IVSHMEM_SERVER_H */

View File

@@ -608,17 +608,16 @@ int cpu_exec(CPUState *cpu)
init_delay_params(&sc, cpu);
for(;;) {
TranslationBlock *tb, *last_tb;
int tb_exit = 0;
/* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
TranslationBlock *tb, *last_tb = NULL;
int tb_exit = 0;
/* if an exception is pending, we execute it here */
if (cpu_handle_exception(cpu, &ret)) {
break;
}
last_tb = NULL; /* forget the last executed TB after exception */
cpu->tb_flushed = false; /* reset before first TB lookup */
for(;;) {
cpu_handle_interrupt(cpu, &last_tb);

View File

@@ -498,6 +498,35 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
return qemu_ram_addr_from_host_nofail(p);
}
/* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb. */
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
size_t elt_ofs, target_ulong page)
{
size_t vidx;
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
tmpio = *io; *io = *vio; *vio = tmpio;
return true;
}
}
return false;
}
/* Macro to call the above, with local variables from the use context. */
#define VICTIM_TLB_HIT(TY, ADDR) \
victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
(ADDR) & TARGET_PAGE_MASK)
#define MMUSUFFIX _mmu
#define SHIFT 0

View File

@@ -1,5 +1,8 @@
crypto-obj-y = init.o
crypto-obj-y += hash.o
crypto-obj-$(CONFIG_NETTLE) += hash-nettle.o
crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += hash-gcrypt.o
crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT),n,y)) += hash-glib.o
crypto-obj-y += aes.o
crypto-obj-y += desrfb.o
crypto-obj-y += cipher.o
@@ -10,6 +13,7 @@ crypto-obj-y += tlssession.o
crypto-obj-y += secret.o
crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS_RND),n,y)) += random-platform.o
crypto-obj-y += pbkdf.o
crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
@@ -26,5 +30,4 @@ crypto-obj-y += block-luks.o
# Let the userspace emulators avoid linking gnutls/etc
crypto-aes-obj-y = aes.o
stub-obj-y += random-stub.o
stub-obj-y += pbkdf-stub.o

View File

@@ -201,6 +201,15 @@ QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592);
struct QCryptoBlockLUKS {
QCryptoBlockLUKSHeader header;
/* Cache parsed versions of what's in header fields,
* as we can't rely on QCryptoBlock.cipher being
* non-NULL */
QCryptoCipherAlgorithm cipher_alg;
QCryptoCipherMode cipher_mode;
QCryptoIVGenAlgorithm ivgen_alg;
QCryptoHashAlgorithm ivgen_hash_alg;
QCryptoHashAlgorithm hash_alg;
};
@@ -776,6 +785,11 @@ qcrypto_block_luks_open(QCryptoBlock *block,
}
if (ivalg == QCRYPTO_IVGEN_ALG_ESSIV) {
if (!ivhash_name) {
ret = -EINVAL;
error_setg(errp, "Missing IV generator hash specification");
goto fail;
}
ivcipheralg = qcrypto_block_luks_essiv_cipher(cipheralg,
ivhash,
&local_err);
@@ -785,6 +799,13 @@ qcrypto_block_luks_open(QCryptoBlock *block,
goto fail;
}
} else {
/* Note we parsed the ivhash_name earlier in the cipher_mode
* spec string even with plain/plain64 ivgens, but we
* will ignore it, since it is irrelevant for these ivgens.
* This is for compat with dm-crypt which will silently
* ignore hash names with these ivgens rather than report
* an error about the invalid usage
*/
ivcipheralg = cipheralg;
}
@@ -835,6 +856,12 @@ qcrypto_block_luks_open(QCryptoBlock *block,
block->payload_offset = luks->header.payload_offset *
QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
luks->cipher_alg = cipheralg;
luks->cipher_mode = ciphermode;
luks->ivgen_alg = ivalg;
luks->ivgen_hash_alg = ivhash;
luks->hash_alg = hash;
g_free(masterkey);
g_free(password);
@@ -904,6 +931,15 @@ qcrypto_block_luks_create(QCryptoBlock *block,
if (!luks_opts.has_hash_alg) {
luks_opts.hash_alg = QCRYPTO_HASH_ALG_SHA256;
}
if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
if (!luks_opts.has_ivgen_hash_alg) {
luks_opts.ivgen_hash_alg = QCRYPTO_HASH_ALG_SHA256;
luks_opts.has_ivgen_hash_alg = true;
}
}
/* Note we're allowing ivgen_hash_alg to be set even for
* non-essiv iv generators that don't need a hash. It will
* be silently ignored, for compatibility with dm-crypt */
if (!options->u.luks.key_secret) {
error_setg(errp, "Parameter 'key-secret' is required for cipher");
@@ -1250,6 +1286,12 @@ qcrypto_block_luks_create(QCryptoBlock *block,
goto error;
}
luks->cipher_alg = luks_opts.cipher_alg;
luks->cipher_mode = luks_opts.cipher_mode;
luks->ivgen_alg = luks_opts.ivgen_alg;
luks->ivgen_hash_alg = luks_opts.ivgen_hash_alg;
luks->hash_alg = luks_opts.hash_alg;
memset(masterkey, 0, luks->header.key_bytes);
g_free(masterkey);
memset(slotkey, 0, luks->header.key_bytes);
@@ -1284,6 +1326,51 @@ qcrypto_block_luks_create(QCryptoBlock *block,
}
static int qcrypto_block_luks_get_info(QCryptoBlock *block,
QCryptoBlockInfo *info,
Error **errp)
{
QCryptoBlockLUKS *luks = block->opaque;
QCryptoBlockInfoLUKSSlot *slot;
QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots;
size_t i;
info->u.luks.cipher_alg = luks->cipher_alg;
info->u.luks.cipher_mode = luks->cipher_mode;
info->u.luks.ivgen_alg = luks->ivgen_alg;
if (info->u.luks.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
info->u.luks.has_ivgen_hash_alg = true;
info->u.luks.ivgen_hash_alg = luks->ivgen_hash_alg;
}
info->u.luks.hash_alg = luks->hash_alg;
info->u.luks.payload_offset = block->payload_offset;
info->u.luks.master_key_iters = luks->header.master_key_iterations;
info->u.luks.uuid = g_strndup((const char *)luks->header.uuid,
sizeof(luks->header.uuid));
for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1);
*prev = slots;
slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
slot->active = luks->header.key_slots[i].active ==
QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
slot->key_offset = luks->header.key_slots[i].key_offset
* QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
if (slot->active) {
slot->has_iters = true;
slot->iters = luks->header.key_slots[i].iterations;
slot->has_stripes = true;
slot->stripes = luks->header.key_slots[i].stripes;
}
prev = &slots->next;
}
return 0;
}
static void qcrypto_block_luks_cleanup(QCryptoBlock *block)
{
g_free(block->opaque);
@@ -1321,6 +1408,7 @@ qcrypto_block_luks_encrypt(QCryptoBlock *block,
const QCryptoBlockDriver qcrypto_block_driver_luks = {
.open = qcrypto_block_luks_open,
.create = qcrypto_block_luks_create,
.get_info = qcrypto_block_luks_get_info,
.cleanup = qcrypto_block_luks_cleanup,
.decrypt = qcrypto_block_luks_decrypt,
.encrypt = qcrypto_block_luks_encrypt,

View File

@@ -18,11 +18,11 @@
*
*/
#ifndef QCRYPTO_BLOCK_LUKS_H__
#define QCRYPTO_BLOCK_LUKS_H__
#ifndef QCRYPTO_BLOCK_LUKS_H
#define QCRYPTO_BLOCK_LUKS_H
#include "crypto/blockpriv.h"
extern const QCryptoBlockDriver qcrypto_block_driver_luks;
#endif /* QCRYPTO_BLOCK_LUKS_H__ */
#endif /* QCRYPTO_BLOCK_LUKS_H */

View File

@@ -18,11 +18,11 @@
*
*/
#ifndef QCRYPTO_BLOCK_QCOW_H__
#define QCRYPTO_BLOCK_QCOW_H__
#ifndef QCRYPTO_BLOCK_QCOW_H
#define QCRYPTO_BLOCK_QCOW_H
#include "crypto/blockpriv.h"
extern const QCryptoBlockDriver qcrypto_block_driver_qcow;
#endif /* QCRYPTO_BLOCK_QCOW_H__ */
#endif /* QCRYPTO_BLOCK_QCOW_H */

View File

@@ -59,7 +59,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
!qcrypto_block_drivers[options->format]) {
error_setg(errp, "Unsupported block driver %d", options->format);
error_setg(errp, "Unsupported block driver %s",
QCryptoBlockFormat_lookup[options->format]);
g_free(block);
return NULL;
}
@@ -88,7 +89,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
!qcrypto_block_drivers[options->format]) {
error_setg(errp, "Unsupported block driver %d", options->format);
error_setg(errp, "Unsupported block driver %s",
QCryptoBlockFormat_lookup[options->format]);
g_free(block);
return NULL;
}
@@ -105,6 +107,23 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
}
QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
Error **errp)
{
QCryptoBlockInfo *info = g_new0(QCryptoBlockInfo, 1);
info->format = block->format;
if (block->driver->get_info &&
block->driver->get_info(block, info, errp) < 0) {
g_free(info);
return NULL;
}
return info;
}
int qcrypto_block_decrypt(QCryptoBlock *block,
uint64_t startsector,
uint8_t *buf,

View File

@@ -18,8 +18,8 @@
*
*/
#ifndef QCRYPTO_BLOCK_PRIV_H__
#define QCRYPTO_BLOCK_PRIV_H__
#ifndef QCRYPTO_BLOCKPRIV_H
#define QCRYPTO_BLOCKPRIV_H
#include "crypto/block.h"
@@ -53,6 +53,10 @@ struct QCryptoBlockDriver {
void *opaque,
Error **errp);
int (*get_info)(QCryptoBlock *block,
QCryptoBlockInfo *info,
Error **errp);
void (*cleanup)(QCryptoBlock *block);
int (*encrypt)(QCryptoBlock *block,
@@ -89,4 +93,4 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
size_t len,
Error **errp);
#endif /* QCRYPTO_BLOCK_PRIV_H__ */
#endif /* QCRYPTO_BLOCKPRIV_H */

View File

@@ -244,7 +244,8 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
@@ -376,7 +377,8 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
QCryptoCipherBuiltin *ctxt;
if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
@@ -442,7 +444,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp,
"Unsupported cipher algorithm %d", cipher->alg);
"Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[cipher->alg]);
goto error;
}

View File

@@ -70,7 +70,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
gcrymode = GCRY_CIPHER_MODE_CBC;
break;
default:
error_setg(errp, "Unsupported cipher mode %d", mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[mode]);
return NULL;
}
@@ -120,7 +121,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d", alg);
error_setg(errp, "Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[alg]);
return NULL;
}
@@ -192,6 +194,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
}
if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
if (ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp,
"Cipher block size %zu must equal XTS block size %d",
ctx->blocksize, XTS_BLOCK_SIZE);
goto error;
}
ctx->iv = g_new0(uint8_t, ctx->blocksize);
}

View File

@@ -227,7 +227,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
case QCRYPTO_CIPHER_MODE_XTS:
break;
default:
error_setg(errp, "Unsupported cipher mode %d", mode);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[mode]);
return NULL;
}
@@ -357,7 +358,15 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d", alg);
error_setg(errp, "Unsupported cipher algorithm %s",
QCryptoCipherAlgorithm_lookup[alg]);
goto error;
}
if (mode == QCRYPTO_CIPHER_MODE_XTS &&
ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp, "Cipher block size %zu must equal XTS block size %d",
ctx->blocksize, XTS_BLOCK_SIZE);
goto error;
}
@@ -422,8 +431,8 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d",
cipher->alg);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
return 0;
@@ -456,19 +465,14 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
break;
case QCRYPTO_CIPHER_MODE_XTS:
if (ctx->blocksize != XTS_BLOCK_SIZE) {
error_setg(errp, "Block size must be %d not %zu",
XTS_BLOCK_SIZE, ctx->blocksize);
return -1;
}
xts_decrypt(ctx->ctx, ctx->ctx_tweak,
ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
ctx->iv, len, out, in);
break;
default:
error_setg(errp, "Unsupported cipher algorithm %d",
cipher->alg);
error_setg(errp, "Unsupported cipher mode %s",
QCryptoCipherMode_lookup[cipher->mode]);
return -1;
}
return 0;

109
crypto/hash-gcrypt.c Normal file
View File

@@ -0,0 +1,109 @@
/*
* QEMU Crypto hash algorithms
*
* Copyright (c) 2016 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include <gcrypt.h>
#include "qapi/error.h"
#include "crypto/hash.h"
static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
[QCRYPTO_HASH_ALG_MD5] = GCRY_MD_MD5,
[QCRYPTO_HASH_ALG_SHA1] = GCRY_MD_SHA1,
[QCRYPTO_HASH_ALG_SHA224] = GCRY_MD_SHA224,
[QCRYPTO_HASH_ALG_SHA256] = GCRY_MD_SHA256,
[QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384,
[QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512,
[QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160,
};
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
{
if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
qcrypto_hash_alg_map[alg] != GCRY_MD_NONE) {
return true;
}
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov,
size_t niov,
uint8_t **result,
size_t *resultlen,
Error **errp)
{
int i, ret;
gcry_md_hd_t md;
unsigned char *digest;
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);
return -1;
}
ret = gcry_md_open(&md, qcrypto_hash_alg_map[alg], 0);
if (ret < 0) {
error_setg(errp,
"Unable to initialize hash algorithm: %s",
gcry_strerror(ret));
return -1;
}
for (i = 0; i < niov; i++) {
gcry_md_write(md, iov[i].iov_base, iov[i].iov_len);
}
ret = gcry_md_get_algo_dlen(qcrypto_hash_alg_map[alg]);
if (ret <= 0) {
error_setg(errp,
"Unable to get hash length: %s",
gcry_strerror(ret));
goto error;
}
if (*resultlen == 0) {
*resultlen = ret;
*result = g_new0(uint8_t, *resultlen);
} else if (*resultlen != ret) {
error_setg(errp,
"Result buffer size %zu is smaller than hash %d",
*resultlen, ret);
goto error;
}
digest = gcry_md_read(md, 0);
if (!digest) {
error_setg(errp,
"No digest produced");
goto error;
}
memcpy(*result, digest, *resultlen);
gcry_md_close(md);
return 0;
error:
gcry_md_close(md);
return -1;
}

97
crypto/hash-glib.c Normal file
View File

@@ -0,0 +1,97 @@
/*
* QEMU Crypto hash algorithms
*
* Copyright (c) 2016 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "crypto/hash.h"
static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
[QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5,
[QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1,
[QCRYPTO_HASH_ALG_SHA224] = -1,
[QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256,
[QCRYPTO_HASH_ALG_SHA384] = -1,
#if GLIB_CHECK_VERSION(2, 36, 0)
[QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512,
#else
[QCRYPTO_HASH_ALG_SHA512] = -1,
#endif
[QCRYPTO_HASH_ALG_RIPEMD160] = -1,
};
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
{
if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
qcrypto_hash_alg_map[alg] != -1) {
return true;
}
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov,
size_t niov,
uint8_t **result,
size_t *resultlen,
Error **errp)
{
int i, ret;
GChecksum *cs;
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);
return -1;
}
cs = g_checksum_new(qcrypto_hash_alg_map[alg]);
for (i = 0; i < niov; i++) {
g_checksum_update(cs, iov[i].iov_base, iov[i].iov_len);
}
ret = g_checksum_type_get_length(qcrypto_hash_alg_map[alg]);
if (ret < 0) {
error_setg(errp, "%s",
"Unable to get hash length");
goto error;
}
if (*resultlen == 0) {
*resultlen = ret;
*result = g_new0(uint8_t, *resultlen);
} else if (*resultlen != ret) {
error_setg(errp,
"Result buffer size %zu is smaller than hash %d",
*resultlen, ret);
goto error;
}
g_checksum_get_digest(cs, *result, resultlen);
g_checksum_free(cs);
return 0;
error:
g_checksum_free(cs);
return -1;
}

154
crypto/hash-nettle.c Normal file
View File

@@ -0,0 +1,154 @@
/*
* QEMU Crypto hash algorithms
*
* Copyright (c) 2016 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "crypto/hash.h"
#include <nettle/md5.h>
#include <nettle/sha.h>
#include <nettle/ripemd160.h>
typedef void (*qcrypto_nettle_init)(void *ctx);
typedef void (*qcrypto_nettle_write)(void *ctx,
unsigned int len,
const uint8_t *buf);
typedef void (*qcrypto_nettle_result)(void *ctx,
unsigned int len,
uint8_t *buf);
union qcrypto_hash_ctx {
struct md5_ctx md5;
struct sha1_ctx sha1;
struct sha224_ctx sha224;
struct sha256_ctx sha256;
struct sha384_ctx sha384;
struct sha512_ctx sha512;
struct ripemd160_ctx ripemd160;
};
struct qcrypto_hash_alg {
qcrypto_nettle_init init;
qcrypto_nettle_write write;
qcrypto_nettle_result result;
size_t len;
} qcrypto_hash_alg_map[] = {
[QCRYPTO_HASH_ALG_MD5] = {
.init = (qcrypto_nettle_init)md5_init,
.write = (qcrypto_nettle_write)md5_update,
.result = (qcrypto_nettle_result)md5_digest,
.len = MD5_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_SHA1] = {
.init = (qcrypto_nettle_init)sha1_init,
.write = (qcrypto_nettle_write)sha1_update,
.result = (qcrypto_nettle_result)sha1_digest,
.len = SHA1_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_SHA224] = {
.init = (qcrypto_nettle_init)sha224_init,
.write = (qcrypto_nettle_write)sha224_update,
.result = (qcrypto_nettle_result)sha224_digest,
.len = SHA224_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_SHA256] = {
.init = (qcrypto_nettle_init)sha256_init,
.write = (qcrypto_nettle_write)sha256_update,
.result = (qcrypto_nettle_result)sha256_digest,
.len = SHA256_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_SHA384] = {
.init = (qcrypto_nettle_init)sha384_init,
.write = (qcrypto_nettle_write)sha384_update,
.result = (qcrypto_nettle_result)sha384_digest,
.len = SHA384_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_SHA512] = {
.init = (qcrypto_nettle_init)sha512_init,
.write = (qcrypto_nettle_write)sha512_update,
.result = (qcrypto_nettle_result)sha512_digest,
.len = SHA512_DIGEST_SIZE,
},
[QCRYPTO_HASH_ALG_RIPEMD160] = {
.init = (qcrypto_nettle_init)ripemd160_init,
.write = (qcrypto_nettle_write)ripemd160_update,
.result = (qcrypto_nettle_result)ripemd160_digest,
.len = RIPEMD160_DIGEST_SIZE,
},
};
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
{
if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
qcrypto_hash_alg_map[alg].init != NULL) {
return true;
}
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov,
size_t niov,
uint8_t **result,
size_t *resultlen,
Error **errp)
{
int i;
union qcrypto_hash_ctx ctx;
if (!qcrypto_hash_supports(alg)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);
return -1;
}
qcrypto_hash_alg_map[alg].init(&ctx);
for (i = 0; i < niov; i++) {
/* Some versions of nettle have functions
* declared with 'int' instead of 'size_t'
* so to be safe avoid writing more than
* UINT_MAX bytes at a time
*/
size_t len = iov[i].iov_len;
uint8_t *base = iov[i].iov_base;
while (len) {
size_t shortlen = MIN(len, UINT_MAX);
qcrypto_hash_alg_map[alg].write(&ctx, len, base);
len -= shortlen;
base += len;
}
}
if (*resultlen == 0) {
*resultlen = qcrypto_hash_alg_map[alg].len;
*result = g_new0(uint8_t, *resultlen);
} else if (*resultlen != qcrypto_hash_alg_map[alg].len) {
error_setg(errp,
"Result buffer size %zu is smaller than hash %zu",
*resultlen, qcrypto_hash_alg_map[alg].len);
return -1;
}
qcrypto_hash_alg_map[alg].result(&ctx, *resultlen, *result);
return 0;
}

View File

@@ -22,16 +22,14 @@
#include "qapi/error.h"
#include "crypto/hash.h"
#ifdef CONFIG_GNUTLS_HASH
#include <gnutls/gnutls.h>
#include <gnutls/crypto.h>
#endif
static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = {
[QCRYPTO_HASH_ALG_MD5] = 16,
[QCRYPTO_HASH_ALG_SHA1] = 20,
[QCRYPTO_HASH_ALG_SHA224] = 28,
[QCRYPTO_HASH_ALG_SHA256] = 32,
[QCRYPTO_HASH_ALG_SHA384] = 48,
[QCRYPTO_HASH_ALG_SHA512] = 64,
[QCRYPTO_HASH_ALG_RIPEMD160] = 20,
};
size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
@@ -41,105 +39,6 @@ size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
}
#ifdef CONFIG_GNUTLS_HASH
static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
[QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
[QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
[QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
};
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
{
if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map)) {
return true;
}
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov,
size_t niov,
uint8_t **result,
size_t *resultlen,
Error **errp)
{
int i, ret;
gnutls_hash_hd_t dig;
if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map)) {
error_setg(errp,
"Unknown hash algorithm %d",
alg);
return -1;
}
ret = gnutls_hash_init(&dig, qcrypto_hash_alg_map[alg]);
if (ret < 0) {
error_setg(errp,
"Unable to initialize hash algorithm: %s",
gnutls_strerror(ret));
return -1;
}
for (i = 0; i < niov; i++) {
ret = gnutls_hash(dig, iov[i].iov_base, iov[i].iov_len);
if (ret < 0) {
error_setg(errp,
"Unable process hash data: %s",
gnutls_strerror(ret));
goto error;
}
}
ret = gnutls_hash_get_len(qcrypto_hash_alg_map[alg]);
if (ret <= 0) {
error_setg(errp,
"Unable to get hash length: %s",
gnutls_strerror(ret));
goto error;
}
if (*resultlen == 0) {
*resultlen = ret;
*result = g_new0(uint8_t, *resultlen);
} else if (*resultlen != ret) {
error_setg(errp,
"Result buffer size %zu is smaller than hash %d",
*resultlen, ret);
goto error;
}
gnutls_hash_deinit(dig, *result);
return 0;
error:
gnutls_hash_deinit(dig, NULL);
return -1;
}
#else /* ! CONFIG_GNUTLS_HASH */
gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg G_GNUC_UNUSED)
{
return false;
}
int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
const struct iovec *iov G_GNUC_UNUSED,
size_t niov G_GNUC_UNUSED,
uint8_t **result G_GNUC_UNUSED,
size_t *resultlen G_GNUC_UNUSED,
Error **errp)
{
error_setg(errp,
"Hash algorithm %d not supported without GNUTLS",
alg);
return -1;
}
#endif /* ! CONFIG_GNUTLS_HASH */
int qcrypto_hash_bytes(QCryptoHashAlgorithm alg,
const char *buf,
size_t len,

View File

@@ -59,8 +59,7 @@
#if (defined(CONFIG_GCRYPT) && \
(!defined(CONFIG_GNUTLS) || \
!defined(GNUTLS_VERSION_NUMBER) || \
(GNUTLS_VERSION_NUMBER < 0x020c00)) && \
(LIBGNUTLS_VERSION_NUMBER < 0x020c00)) && \
(!defined(GCRYPT_VERSION_NUMBER) || \
(GCRYPT_VERSION_NUMBER < 0x010600)))
#define QCRYPTO_INIT_GCRYPT_THREADS

View File

@@ -18,8 +18,8 @@
*
*/
#ifndef QCRYPTO_IVGEN_PRIV_H__
#define QCRYPTO_IVGEN_PRIV_H__
#ifndef QCRYPTO_IVGENPRIV_H
#define QCRYPTO_IVGENPRIV_H
#include "crypto/ivgen.h"
@@ -46,4 +46,4 @@ struct QCryptoIVGen {
};
#endif /* QCRYPTO_IVGEN_PRIV_H__ */
#endif /* QCRYPTO_IVGENPRIV_H */

View File

@@ -19,9 +19,9 @@
*/
#include "qemu/osdep.h"
#include <gcrypt.h>
#include "qapi/error.h"
#include "crypto/pbkdf.h"
#include "gcrypt.h"
bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
{

Some files were not shown because too many files have changed in this diff Show More