Compare commits

...

364 Commits

Author SHA1 Message Date
Thomas Huth
bcf19777df audio/sdlaudio: Allow audio playback with SDL2
When compiling with SDL2, the semaphore trick used in sdlaudio.c
does not work - QEMU locks up completely in this case. To avoid
the hang and get at least some audio playback up and running (it's
a little bit crackling, but better than nothing), we can use the
SDL locking functions SDL_LockAudio() and SDL_UnlockAudio() to sync
with the sound playback thread instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-id: 1485852398-2327-1-git-send-email-thuth@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
1ffc266539 audio: make audio poll timer deterministic
This patch changes resetting strategy of the audio polling timer.
It does not change expiration time if the timer is already set.
This patch is needed to make this timer deterministic and to use execution
record/replay for audio devices.

audio_reset_timer is used in the function audio_vm_change_state_handler.
Therefore every time VM is stopped or restarted the timer will be reset
to new timeout. Virtual clock does not proceed while VM is stopped.
Therefore there is no need in resetting the timeout when VM restarts.

v2: updated commit message
v3: now using timer_mod_anticipate function (as suggested by Yurii Zubrytskyi)

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170214071510.6112.76764.stgit@PASHA-ISP
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:12:03 +01:00
Pavel Dovgalyuk
3d4d16f4dc replay: add record/replay for audio passthrough
This patch adds recording and replaying audio data. Is saves synchronization
information for audio out and inputs from the microphone.

v2: removed unneeded whitespace change

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-id: 20170202055054.4848.94901.stgit@PASHA-ISP.lan02.inno

[ kraxel: add qemu/error-report.h include to fix osx build failure ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-03-01 15:11:44 +01:00
Marc-André Lureau
e7c83a885f vhost-user: delay vhost_user_stop
Since commit b0a335e351, a socket write
may trigger a disconnect events, calling vhost_user_stop() and clearing
all the vhost_dev strutures holding data that vhost.c functions expect
to remain valid. Delay the cleanup to keep the vhost_dev structure
valid during the vhost.c functions.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170227104956.24729-1-marcandre.lureau@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 19:11:15 +00:00
Peter Maydell
9514f2648c Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
This pull request brings:
- a fix to a minor bug reported by Coverity
- throttling support in the local backend (command line only)

# gpg: Signature made Tue 28 Feb 2017 09:32:30 GMT
# gpg:                using DSA key 0x02FC3AEB0101DBC2
# gpg: Good signature from "Greg Kurz <groug@kaod.org>"
# gpg:                 aka "Greg Kurz <groug@free.fr>"
# gpg:                 aka "Greg Kurz <gkurz@linux.vnet.ibm.com>"
# gpg:                 aka "Gregory Kurz (Groug) <groug@free.fr>"
# gpg:                 aka "[jpeg image of size 3330]"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2BD4 3B44 535E C0A7 9894  DBA2 02FC 3AEB 0101 DBC2

* remotes/gkurz/tags/for-upstream:
  throttle: factor out duplicate code
  fsdev: add IO throttle support to fsdev devices
  9pfs: fix v9fs_lock error case

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 17:39:49 +00:00
Peter Maydell
7d1730b7d9 Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging
trivial patches for 2017-02-28

# gpg: Signature made Tue 28 Feb 2017 06:43:55 GMT
# gpg:                using RSA key 0x701B4F6B1A693E59
# gpg: Good signature from "Michael Tokarev <mjt@tls.msk.ru>"
# gpg:                 aka "Michael Tokarev <mjt@corpit.ru>"
# gpg:                 aka "Michael Tokarev <mjt@debian.org>"
# Primary key fingerprint: 6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
#      Subkey fingerprint: 7B73 BAD6 8BE7 A2C2 8931  4B22 701B 4F6B 1A69 3E59

* remotes/mjt/tags/trivial-patches-fetch:
  syscall: fixed mincore(2) not failing with ENOMEM
  hw/acpi/tco.c: fix tco timer stop
  lm32: milkymist-tmu2: fix a third integer overflow
  qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
  Update copyright year
  tests/prom-env: Enable the test for the sun4u machine, too
  cadence_gem: Remove unused parameter debug message
  register: fix incorrect read mask
  ide: remove undefined behavior in ide-test
  CODING_STYLE: Mention preferred comment form
  hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
  hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
  softfloat: Use correct type in float64_to_uint64_round_to_zero()
  target/s390x: Fix typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 16:22:41 +00:00
Peter Maydell
1bbe5dc66b Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20170228' into staging
target-arm queue:
 * raspi2: implement RNG module
 * raspi2: implement new SD card controller (but don't wire it up)
 * sdhci: bugfixes for block transfers
 * virt: fix cpu object reference leak
 * Add missing fp_access_check() to aarch64 crypto instructions
 * cputlb: Don't assume do_unassigned_access() never returns
 * virt: Add a user option to disallow ITS instantiation
 * i.MX timers: fix reset handling
 * ARMv7M NVIC: rewrite to fix broken priority handling and masking
 * exynos: Fix proper mapping of CPUs by providing real cluster ID
 * exynos: Fix Linux kernel division by zero for PLLs

# gpg: Signature made Tue 28 Feb 2017 12:40:51 GMT
# gpg:                using RSA key 0x3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20170228: (27 commits)
  hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
  hw/arm/exynos: Fix Linux kernel division by zero for PLLs
  bcm2835_sdhost: add bcm2835 sdhost controller
  armv7m: Allow SHCSR writes to change pending and active bits
  armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
  armv7m: Check exception return consistency
  armv7m: Extract "exception taken" code into functions
  armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
  armv7m: Simpler and faster exception start
  armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
  armv7m: Escalate exceptions to HardFault if necessary
  arm: gic: Remove references to NVIC
  armv7m: Fix condition check for taking exceptions
  armv7m: Rewrite NVIC to not use any GIC code
  armv7m: Implement reading and writing of PRIGROUP
  armv7m: Rename nvic_state to NVICState
  ARM i.MX timers: fix reset handling
  hw/arm/virt: Add a user option to disallow ITS instantiation
  cputlb: Don't assume do_unassigned_access() never returns
  Add missing fp_access_check() to aarch64 crypto instructions
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 14:50:17 +00:00
Peter Maydell
c8c0a1a784 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 28 Feb 2017 04:34:34 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  iscsi: add missing colons to the qapi docs
  block/mirror: fix broken sparseness detection

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:41:03 +00:00
Peter Maydell
a57aaa4e74 Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20170228' into staging
Enable MTTCG for Alpha guest

# gpg: Signature made Tue 28 Feb 2017 00:43:17 GMT
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-axp-20170228:
  target/alpha: Enable MTTCG by default

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 13:01:50 +00:00
Krzysztof Kozlowski
f3a6339a5b hw/arm/exynos: Fix proper mapping of CPUs by providing real cluster ID
The Exynos4210 has cluster ID 0x9 in its MPIDR register (raw value
0x8000090x).  If this cluster ID is not provided, then Linux kernel
cannot map DeviceTree nodes to MPIDR values resulting in kernel
warning and lack of any secondary CPUs:

    DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map
    ...
    smp: Bringing up secondary CPUs ...
    smp: Brought up 1 node, 1 CPU
    SMP: Total of 1 processors activated (24.00 BogoMIPS).

Provide a cluster ID so Linux will see proper MPIDR and will try to
bring the secondary CPU online.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-2-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Krzysztof Kozlowski
1e0228fd20 hw/arm/exynos: Fix Linux kernel division by zero for PLLs
Without any clock controller, the Linux kernel was hitting division by
zero during boot or with clk_summary:
[    0.000000] [<c031054c>] (unwind_backtrace) from [<c030ba6c>] (show_stack+0x10/0x14)
[    0.000000] [<c030ba6c>] (show_stack) from [<c05b2660>] (dump_stack+0x88/0x9c)
[    0.000000] [<c05b2660>] (dump_stack) from [<c05b11a4>] (Ldiv0+0x8/0x10)
[    0.000000] [<c05b11a4>] (Ldiv0) from [<c06ad1e0>] (samsung_pll45xx_recalc_rate+0x58/0x74)
[    0.000000] [<c06ad1e0>] (samsung_pll45xx_recalc_rate) from [<c0692ec0>] (clk_register+0x39c/0x63c)
[    0.000000] [<c0692ec0>] (clk_register) from [<c125d360>] (samsung_clk_register_pll+0x2e0/0x3d4)
[    0.000000] [<c125d360>] (samsung_clk_register_pll) from [<c125d7e8>] (exynos4_clk_init+0x1b0/0x5e4)
[    0.000000] [<c125d7e8>] (exynos4_clk_init) from [<c12335f4>] (of_clk_init+0x17c/0x210)
[    0.000000] [<c12335f4>] (of_clk_init) from [<c1204700>] (time_init+0x24/0x2c)
[    0.000000] [<c1204700>] (time_init) from [<c1200b2c>] (start_kernel+0x24c/0x38c)
[    0.000000] [<c1200b2c>] (start_kernel) from [<4020807c>] (0x4020807c)

Provide stub for clock controller returning reset values for PLLs.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Message-id: 20170226200142.31169-1-krzk@kernel.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:20 +00:00
Clement Deschamps
43ddc182e2 bcm2835_sdhost: add bcm2835 sdhost controller
This adds the BCM2835 SDHost controller from Arasan.

Signed-off-by: Clement Deschamps <clement.deschamps@antfield.fr>
Message-id: 20170224164021.9066-2-clement.deschamps@antfield.fr
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
5db53e353d armv7m: Allow SHCSR writes to change pending and active bits
Implement the NVIC SHCSR write behaviour which allows pending and
active status of some exceptions to be changed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
e13886e3a7 armv7m: Raise correct kind of UsageFault for attempts to execute ARM code
M profile doesn't implement ARM, and the architecturally required
behaviour for attempts to execute with the Thumb bit clear is to
generate a UsageFault with the CFSR INVSTATE bit set.  We were
incorrectly implementing this as generating an UNDEFINSTR UsageFault;
fix this.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
aa488fe3bb armv7m: Check exception return consistency
Implement the exception return consistency checks
described in the v7M pseudocode ExceptionReturn().

Inspired by a patch from Michael Davidsaver's series, but
this is a reimplementation from scratch based on the
ARM ARM pseudocode.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:19 +00:00
Peter Maydell
39ae2474e3 armv7m: Extract "exception taken" code into functions
Extract the code from the tail end of arm_v7m_do_interrupt() which
enters the exception handler into a pair of utility functions
v7m_exception_taken() and v7m_push_stack(), which correspond roughly
to the pseudocode PushStack() and ExceptionTaken().

This also requires us to move the arm_v7m_load_vector() utility
routine up so we can call it.

Handling illegal exception returns has some cases where we want to
take a UsageFault either on an existing stack frame or with a new
stack frame but with a specific LR value, so we want to be able to
call these without having to go via arm_v7m_cpu_do_interrupt().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
14790f730a armv7m: VECTCLRACTIVE and VECTRESET are UNPREDICTABLE
The VECTCLRACTIVE and VECTRESET bits in the AIRCR are both
documented as UNPREDICTABLE if you write a 1 to them when
the processor is not halted in Debug state (ie stopped
and under the control of an external JTAG debugger).
Since we don't implement Debug state or emulated JTAG
these bits are always UNPREDICTABLE for us. Instead of
logging them as unimplemented we can simply log writes
as guest errors and ignore them.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: change extracted from another patch; commit message
 constructed from scratch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a25dc805e2 armv7m: Simpler and faster exception start
All the places in armv7m_cpu_do_interrupt() which pend an
exception in the NVIC are doing so for synchronous
exceptions. We know that we will always take some
exception in this case, so we can just acknowledge it
immediately, rather than returning and then immediately
being called again because the NVIC has raised its outbound
IRQ line.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: tweaked commit message; added DEBUG to the set of
exceptions we handle immediately, since it is synchronous
when it results from the BKPT instruction]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Peter Maydell
a5d8235545 armv7m: Remove unused armv7m_nvic_acknowledge_irq() return value
Having armv7m_nvic_acknowledge_irq() return the new value of
env->v7m.exception and its one caller assign the return value
back to env->v7m.exception is pointless. Just make the return
type void instead.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:18 +00:00
Michael Davidsaver
a73c98e159 armv7m: Escalate exceptions to HardFault if necessary
The v7M exception architecture requires that if a synchronous
exception cannot be taken immediately (because it is disabled
or at too low a priority) then it should be escalated to
HardFault (and the HardFault exception is then taken).
Implement this escalation logic.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: extracted from another patch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
7c14b3ac07 arm: gic: Remove references to NVIC
Now that the NVIC is its own separate implementation, we can
clean up the GIC code by removing REV_NVIC and conditionals
which use it.

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
7ecdaa4a96 armv7m: Fix condition check for taking exceptions
The M profile condition for when we can take a pending exception or
interrupt is not the same as that for A/R profile.  The code
originally copied from the A/R profile version of the
cpu_exec_interrupt function only worked by chance for the
very simple case of exceptions being masked by PRIMASK.
Replace it with a call to a function in the NVIC code that
correctly compares the priority of the pending exception
against the current execution priority of the CPU.

[Michael Davidsaver's patchset had a patch to do something
similar but the implementation ended up being a rewrite.]

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Michael Davidsaver
da6d674e50 armv7m: Rewrite NVIC to not use any GIC code
Despite some superficial similarities of register layout, the
M-profile NVIC is really very different from the A-profile GIC.
Our current attempt to reuse the GIC code means that we have
significant bugs in our NVIC.

Implement the NVIC as an entirely separate device, to give
us somewhere we can get the behaviour correct.

This initial commit does not attempt to implement exception
priority escalation, since the GIC-based code didn't either.
It does fix a few bugs in passing:
 * ICSR.RETTOBASE polarity was wrong and didn't account for
   internal exceptions
 * ICSR.VECTPENDING was 16 too high if the pending exception
   was for an external interrupt
 * UsageFault, BusFault and MemFault were not disabled on reset
   as they are supposed to be

Signed-off-by: Michael Davidsaver <mdavidsaver@gmail.com>
[PMM: reworked, various bugs and stylistic cleanups]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:17 +00:00
Peter Maydell
1004102a77 armv7m: Implement reading and writing of PRIGROUP
Add a state field for the v7M PRIGROUP register and implent
reading and writing it. The current NVIC doesn't honour
the values written, but the new version will.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
f797c07507 armv7m: Rename nvic_state to NVICState
Rename the nvic_state struct to NVICState, to match
our naming conventions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2017-02-28 12:08:16 +00:00
Kurban Mallachiev
c98c9eba88 ARM i.MX timers: fix reset handling
The i.MX timer device can be reset by writing to the SWR bit
of the CR register. This has to behave differently from hard
(power-on) reset because it does not reset all of the bits
in the CR register.

We were incorrectly implementing soft reset and hard reset
the same way, and in addition had a logic error which meant
that we were clearing the bits that soft-reset is supposed
to preserve and not touching the bits that soft-reset clears.
This was not correct behaviour for either kind of reset.

Separate out the soft reset and hard reset code paths, and
correct the handling of reset of the CR register so that it
is correct in both cases.

Signed-off-by: Kurban Mallachiev <mallachiev@ispras.ru>
[PMM: rephrased commit message, spacing on operators;
 use bool rather than int for is_soft_reset]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Eric Auger
ccc11b0279 hw/arm/virt: Add a user option to disallow ITS instantiation
In 2.9 ITS will block save/restore and migration use cases. As such,
let's introduce a user option that allows to turn its instantiation
off, along with GICv3. With the "its" option turned false, migration
will be possible, obviously at the expense of MSI support (with GICv3).

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1487681108-14452-1-git-send-email-eric.auger@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:16 +00:00
Peter Maydell
44d7ce0ef3 cputlb: Don't assume do_unassigned_access() never returns
In get_page_addr_code(), if the guest PC doesn't correspond to RAM
then we currently run the CPU's do_unassigned_access() hook if it has
one, and otherwise we give up and exit QEMU with a more-or-less
useful message.  This code assumes that the do_unassigned_access hook
will never return, because if it does then we'll plough on attempting
to use a non-RAM TLB entry to get a RAM address and will abort() in
qemu_ram_addr_from_host_nofail().  Unfortunately some CPU
implementations of this hook do return: Microblaze, SPARC and the ARM
v7M.

Change the code to call report_bad_exec() if the hook returns, as
well as if it didn't have one.  This means we can tidy it up to use
the cpu_unassigned_access() function which wraps the "get the CPU
class and call the hook if it has one" work, since we aren't trying
to distinguish "no hook" from "hook existed and returned" any more.

This brings the handling of this hook into line with the handling
used for data accesses, where "hook returned" is treated the
same as "no hook existed" and gets you the default behaviour.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-28 12:08:15 +00:00
Nick Reilly
a4f5c5b723 Add missing fp_access_check() to aarch64 crypto instructions
The aarch64 crypto instructions for AES and SHA are missing the
check for if the FPU is enabled.

Signed-off-by: Nick Reilly <nreilly@blackberry.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Igor Mammedov
dbb74759fa hw/arm/virt: fix cpu object reference leak
object_new(FOO) returns an object with ref_cnt == 1
and following
  object_property_set_bool(cpuobj, true, "realized", NULL)
set parent of cpuobj to '/machine/unattached' which makes
ref_cnt == 2.

Since machvirt_init() doesn't take ownership of cpuobj
returned by object_new() it should explicitly drop
reference to cpuobj when dangling pointer is about to
go out of scope like it's done pc_new_cpu() to avoid
object leak.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 1487253461-269218-1-git-send-email-imammedo@redhat.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
241999bf4c sd: sdhci: Remove block count enable check in single block transfers
In SDHCI protocol, the 'Block count enable' bit of the Transfer
Mode register is relevant only in multi block transfers. We need
not check it in single block transfers.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-5-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:15 +00:00
Prasad J Pandit
45ba9f761b sd: sdhci: conditionally invoke multi block transfer
In sdhci_write invoke multi block transfer if it is enabled
in the transfer mode register 's->trnmod'.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-4-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
6e86d90352 sd: sdhci: check transfer mode register in multi block transfer
In the SDHCI protocol, the transfer mode register value
is used during multi block transfer to check if block count
register is enabled and should be updated. Transfer mode
register could be set such that, block count register would
not be updated, thus leading to an infinite loop. Add check
to avoid it.

Reported-by: Wjjzhang <wjjzhang@tencent.com>
Reported-by: Jiang Xin <jiangxin1@huawei.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Message-id: 20170214185225.7994-3-ppandit@redhat.com
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Prasad J Pandit
8b20aefac4 sd: sdhci: mask transfer mode register value
In SDHCI protocol, the transfer mode register is defined
to be of 6 bits. Mask its value with '0x0037' so that an
invalid value could not be assigned.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 20170214185225.7994-2-ppandit@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:14 +00:00
Peter Maydell
373442ea3a bcm2835_rng: Use qcrypto_random_bytes() rather than rand()
Switch to using qcrypto_random_bytes() rather than rand() as
our source of randomness for the BCM2835 RNG.

If qcrypto_random_bytes() fails, we don't want to return the guest a
non-random value in case they're really using it for cryptographic
purposes, so the best we can do is a fatal error.  This shouldn't
happen unless something's broken, though.

In theory we could implement this device's full FIFO and interrupt
semantics and then just stop filling the FIFO.  That's a lot of work,
though, and doesn't really give a very nice diagnostic to the user
since the guest will just seem to hang.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-28 12:08:14 +00:00
Marcin Chojnacki
54a5ba13a9 target-arm: Implement BCM2835 hardware RNG
Recent vanilla Raspberry Pi kernels started to make use of
the hardware random number generator in BCM2835 SoC. As a
result, those kernels wouldn't work anymore under QEMU
but rather just freeze during the boot process.

This patch implements a trivial BCM2835 compatible RNG,
and adds it as a peripheral to BCM2835 platform, which
allows to boot a vanilla Raspberry Pi kernel under Qemu.

Changes since v1:
 * Prevented guest from writing [31..20] bits in rng_status
 * Removed redundant minimum_version_id_old
 * Added field entries for the state
 * Changed realize function to reset

Signed-off-by: Marcin Chojnacki <marcinch7@gmail.com>
Message-id: 20170210210857.47893-1-marcinch7@gmail.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:08:13 +00:00
Peter Maydell
105d86ff38 Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-upstream-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 22:15:47 GMT
# gpg:                using RSA key 0xF30C38BD3F2FBE3C
# gpg: Good signature from "Laurent Vivier <lvivier@redhat.com>"
# gpg:                 aka "Laurent Vivier <laurent@vivier.eu>"
# gpg:                 aka "Laurent Vivier (Red Hat) <lvivier@redhat.com>"
# Primary key fingerprint: CD2F 75DD C8E3 A4DC 2E4F  5173 F30C 38BD 3F2F BE3C

* remotes/vivier2/tags/linux-user-for-upstream-pull-request:
  syscall: fixed mincore(2) not failing with ENOMEM
  linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
  linux-user: correctly manage SR in ucontext
  linux-user: Add signal handling support for x86_64
  linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
  linux-user: fix fork()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 12:03:36 +00:00
Pradeep Jagadeesh
a2a7862ca9 throttle: factor out duplicate code
This patch removes the redundant throttle code that was present in
block and fsdev device files. Now the common code is moved
to a single file.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(fix indent nit, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Pradeep Jagadeesh
b8bbdb886e fsdev: add IO throttle support to fsdev devices
This patchset adds the throttle support for the 9p-local driver.
For now this functionality can be enabled only through qemu cli options.
QMP interface and support to other drivers need further extensions.
To make it simple for other 9p drivers, the throttle code has been put in
separate files.

Signed-off-by: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
(pass extra NULL CoMutex * argument to qemu_co_queue_wait(),
 added options to qemu-options.hx, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Paolo Bonzini
4bae2b397f 9pfs: fix v9fs_lock error case
In this case, we are marshaling an error status instead of the errno value.
Reorganize the out and out_nofid labels to look like all the other cases.
Coverity reports this because the "err = -ENOENT" and "err = -EINVAL"
assignments above are dead, overwritten by the call to pdu_marshal.

(Coverity issues CID1348512 and CID1348513)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(also open-coded the success path since locking is a nop for us, Greg Kurz)
Signed-off-by: Greg Kurz <groug@kaod.org>
2017-02-28 10:31:46 +01:00
Peter Maydell
6181478f63 Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Mon 27 Feb 2017 16:33:23 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  tests-aio-multithread: use atomic_read properly
  iscsi: do not use aio_context_acquire/release
  nfs: do not use aio_context_acquire/release
  curl: do not use aio_context_acquire/release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-28 08:46:03 +00:00
Franklin \"Snaipe\" Mathieu
f5507e0448 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Igor Pavlikevich
6c608953a5 hw/acpi/tco.c: fix tco timer stop
TCO timer does not actually stop

Signed-off-by: Igor Pavlikevich <ipavlikevich@gmail.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Peter Maydell
3d74ee7dca lm32: milkymist-tmu2: fix a third integer overflow
Don't truncate the multiplication and do a 64 bit one instead
because the result is stored in a 64 bit variable.

This fixes a similar coverity warning to commits 237a8650d6 and
4382fa6554, in a similar way, and is the final third of the fix for
coverity CID 1167561 (hopefully!).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Acked-by: Michael Walle <michael@walle.cc>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Vincenzo Maffione
79cad2faac qemu-options.hx: add missing id=chr0 chardev argument in vhost-user example
In the vhost-user example, a chardev with id chr0 is referenced by the
vhost-user net backend, but the id is not specified in the chardev option.

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Marc-André Lureau
d34d5b3bbd Update copyright year
It's still time to wish happy new year!

The Year of the Rooster will begin on January 28, 2017!

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Thomas Huth
6b591ad613 tests/prom-env: Enable the test for the sun4u machine, too
The 32-bit TCG bug has been fixed a while ago, so we can enable
this test for sparc64 now, too. Unfortunately, OpenBIOS does not
work with the sun4v machine anymore (it needs to catch up with the
improved emulation), so we can only enable this test for the sun4u
machine right now.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:39 +03:00
Fam Zheng
8ea1d05632 cadence_gem: Remove unused parameter debug message
Reported by cppcheck.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Philippe Mathieu-Daudé
4729b3a41d register: fix incorrect read mask
The register_read() and register_write() functions expect a bitmask argument.
To avoid duplicated code, a new inlined function register_enabled_mask() is
introduced.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
John Snow
6048018ef6 ide: remove undefined behavior in ide-test
trivial: initialize the dirty buffer with a random-ish byte.
Stops valgrind from whining about uninitialized buffers.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
25ac5bbec4 CODING_STYLE: Mention preferred comment form
Our defacto coding style strongly prefers /* */ style comments
over the single-line // style, and checkpatch enforces this,
but we don't actually document this. Mention it in CODING_STYLE.

Suggested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a6e3707ece hw/core/register: Mark the device with cannot_instantiate_with_device_add_yet
The "qemu,register" device needs to be wired up in source code, there
is no way the user can make any real use of this device with the
"-device" parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Thomas Huth
a70716eb2c hw/core/or-irq: Mark the device with cannot_instantiate_with_device_add_yet
The "or-irq" device needs to be wired up in source code, there is no
way the user can make any real use of this device with the "-device"
parameter or the "device_add" monitor command yet.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Peter Maydell
d000b477f2 softfloat: Use correct type in float64_to_uint64_round_to_zero()
In float64_to_uint64_round_to_zero() a typo meant that we were
taking the uint64_t return value from float64_to_uint64() and
putting it into an int64_t variable before returning it as
uint64_t again. Use uint64_t instead of pointlessly casting it
back and forth to int64_t.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Stefan Weil
8dc52350f9 target/s390x: Fix typo
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2017-02-28 09:03:38 +03:00
Jeff Cody
51654aa52a iscsi: add missing colons to the qapi docs
The missing colons make the iscsi part of the documentation not render
quite as nicely, so add those in.

Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 23:33:41 -05:00
Richard Henderson
5ee4f3c2c7 target/alpha: Enable MTTCG by default
Alpha has a weak memory ordering and issues all of the required barriers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-02-28 11:41:46 +11:00
Franklin \"Snaipe\" Mathieu
98a3331a55 syscall: fixed mincore(2) not failing with ENOMEM
The current implementation of the mincore(2) syscall sets errno to
EFAULT when the region identified by the first two parameters is
invalid.

This goes against the man page specification, where mincore(2) should
only fail with EFAULT when the third parameter is an invalid address;
and fail with ENOMEM when the checked region does not point to mapped
memory.

Signed-off-by: Franklin "Snaipe" Mathieu <snaipe@diacritic.io>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170217085800.28873-2-snaipe@diacritic.io>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Michael Karcher
59ebb6e451 linux-user: fix do_rt_sigreturn on m68k linux userspace emulation
do_rt_sigreturn uses an uninitialised local variable instead of fetching
the old signal mask directly from the signal frame when restoring the mask,
so the signal mask is undefined after do_rt_sigreturn. As the signal
frame data is in target-endian order, target_to_host_sigset instead of
target_to_host_sigset_internal is required.

do_sigreturn is correct in using target_to_host_sigset_internal, because
get_user already did the endianness conversion.

Signed-off-by: Michael Karcher <karcher@physik.fu-berlin.de>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-3-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
3219de458c linux-user: correctly manage SR in ucontext
Use cpu_m68k_get_ccr()/cpu_m68k_set_ccr() to setup and restore correctly
the value of SR in the ucontext structure

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170225110517.2832-2-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Pranith Kumar
1c1df0198b linux-user: Add signal handling support for x86_64
Note that x86_64 has only _rt signal handlers. This implementation
attempts to share code with the x86_32 implementation.

CC: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Allan Wirth <awirth@akamai.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Message-Id: <20170226165345.8757-1-bobby.prani@gmail.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Helge Deller
ee1ac3a182 linux-user: Add sockopts for IPv6 ping and IPv6 traceroute
Add the neccessary sockopts for ping and traceroute on IPv6.

This fixes the following qemu warnings with IPv6:
Unsupported ancillary data: 0/2
Unsupported ancillary data: 0/11
Unsupported ancillary data: 41/25
Unsupported setsockopt level=0 optname=12
Unsupported setsockopt level=41 optname=16
Unsupported setsockopt level=41 optname=25
Unsupported setsockopt level=41 optname=50
Unsupported setsockopt level=41 optname=51
Unsupported setsockopt level=41 optname=8
Unsupported setsockopt level=58 optname=1

Tested with hppa-linux-user (big-endian) on x86_64 (little-endian).

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170218223130.GA25278@ls3530.fritz.box>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Laurent Vivier
7eddb5ddac linux-user: fix fork()
Since commit 5ea2fc8 ("linux-user: Sanity check clone flags"),
trying to run fork() fails with old distro on some architectures.

This is the case with HP-PA and Debian 5 (Lenny).

It fails on:

         if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }

because flags is 17, whereas on HP-PA, SIGCHLD is 18.
17 is the SIGCHLD value of my host (x86_64).

It appears that for TARGET_NR_fork and TARGET_NR_vfork, QEMU calls
do_fork() with SIGCHLD instead of TARGET_SIGCHLD.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170216173707.16209-1-laurent@vivier.eu>
2017-02-27 23:10:02 +01:00
Peter Maydell
9b9fbe8a4e Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170227-1' into staging
gtk: fix kbd on xwayland
vnc: fix double free issues
opengl improvements

# gpg: Signature made Mon 27 Feb 2017 16:11:30 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170227-1:
  vnc: fix double free issues
  spice: add display & head options
  ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
  gtk-egl: add scanout_disable support
  sdl2: add scanout_disable support
  spice: add scanout_disable support
  virtio-gpu: use dpy_gl_scanout_disable
  console: add dpy_gl_scanout_disable
  console: rename dpy_gl_scanout to dpy_gl_scanout_texture

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 19:19:46 +00:00
John Snow
39c11580f3 block/mirror: fix broken sparseness detection
int64_t is in all likelihood the actual scalar type we want.
Yep, really.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1219541

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-27 14:02:31 -05:00
Peter Maydell
8f2d7c3411 Merge remote-tracking branch 'remotes/berrange/tags/pull-qcrypto-2017-02-27-1' into staging
Merge qcrypto 2017/02/27 v1

# gpg: Signature made Mon 27 Feb 2017 13:37:34 GMT
# gpg:                using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg:                 aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E  8E3F BE86 EBB4 1510 4FDF

* remotes/berrange/tags/pull-qcrypto-2017-02-27-1:
  crypto: assert cipher algorithm is always valid
  crypto: fix leak in ivgen essiv init

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 15:33:21 +00:00
Gerd Hoffmann
2dc120beb8 vnc: fix double free issues
Reported by Coverity: CID 1371242, 1371243, 1371244.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487682332-29154-1-git-send-email-kraxel@redhat.com
2017-02-27 16:22:01 +01:00
Gerd Hoffmann
8bf69b499a spice: add display & head options
This allows to specify display and head to use, simliar to vnc.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663858-11731-1-git-send-email-kraxel@redhat.com
2017-02-27 16:21:23 +01:00
Daniel P. Berrange
857e479552 ui: Use XkbGetMap and XkbGetNames instead of XkbGetKeyboard
XkbGetKeyboard does not work in XWayland and even on non-Wayland
X11 servers its use is discouraged:

  https://bugs.freedesktop.org/show_bug.cgi?id=89240

This resolves a problem whereby QEMU prints

  "could not lookup keycode name"

on startup when running under XWayland. Keymap handling is
however still broken after this commit, since Xwayland is
reporting a keymap we can't handle

  "unknown keycodes `(unnamed)', please report to qemu-devel@nongnu.org"

NB, native Wayland support (which is the default under GTK3) is
not affected - only XWayland (which can be requested with GDK_BACKEND
on GTK3, and is the only option for GTK2).

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170227132343.30824-1-berrange@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-27 16:19:47 +01:00
Gerd Hoffmann
543a7a161f gtk-egl: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-7-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
db6cdfbeba sdl2: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-6-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
46ffd0c031 spice: add scanout_disable support
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-5-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
975896fc88 virtio-gpu: use dpy_gl_scanout_disable
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-4-git-send-email-kraxel@redhat.com
2017-02-27 16:15:29 +01:00
Gerd Hoffmann
eaa92c76ce console: add dpy_gl_scanout_disable
Helper function (and DisplayChangeListenerOps ptr) to disable scanouts.
Replaces using dpy_gl_scanout_texture with 0x0 size and no texture
specified.

Allows cleanups to make the io and gfx emulation code more readable.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-3-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Gerd Hoffmann
f4c36bdab6 console: rename dpy_gl_scanout to dpy_gl_scanout_texture
We'll add a variant which accepts dmabufs soon.  Change
the name so we can easily disturgish the two variants.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487669841-13668-2-git-send-email-kraxel@redhat.com
2017-02-27 16:15:28 +01:00
Paolo Bonzini
1ab17f9f5c tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 14:00:53 +00:00
Paolo Bonzini
d045c466d9 iscsi: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libiscsi calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:58 +00:00
Paolo Bonzini
37d1e4d9bf nfs: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect libnfs calls with a QemuMutex.  Callbacks are invoked
using bottom halves, so we don't even have to drop it around
callback invocations.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:58:53 +00:00
Prasad J Pandit
32c813e6c2 crypto: assert cipher algorithm is always valid
Crypto routines 'qcrypto_cipher_get_block_len' and
'qcrypto_cipher_get_key_len' return non-zero cipher block and key
lengths from static arrays 'alg_block_len[]' and 'alg_key_len[]'
respectively. Returning 'zero(0)' value from either of them would
likely lead to an error condition.

Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Li Qiang
0072d2a9fc crypto: fix leak in ivgen essiv init
On error path, the 'salt' doesn't been freed thus leading
a memory leak. This patch avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2017-02-27 13:37:14 +00:00
Paolo Bonzini
ba3186c4e4 curl: do not use aio_context_acquire/release
Now that all bottom halves and callbacks take care of taking the
AioContext lock, we can migrate some users away from it and to a
specific QemuMutex or CoMutex.

Protect BDRVCURLState access with a QemuMutex.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170222180725.28611-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-27 13:33:24 +00:00
Paolo Bonzini
3b1d816984 tests-aio-multithread: use atomic_read properly
nodes[id].next is written by other threads.  If atomic_read is not used
(matching atomic_set in mcs_mutex_lock!) the compiler can optimize the
whole "if" away!

Reported-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Greg Kurz <groug@kaod.org>
Message-id: 20170227111726.9237-1-pbonzini@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-27 12:54:08 +00:00
Peter Maydell
d992f2f136 Merge remote-tracking branch 'remotes/artyom/tags/pull-sun4v-20170226' into staging
Pull request for Niagara patches 2017 02 26

# gpg: Signature made Sun 26 Feb 2017 21:56:06 GMT
# gpg:                using RSA key 0x3360C3F7411A125F
# gpg: Good signature from "Artyom Tarasenko <atar4qemu@gmail.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 2AD8 6149 17F4 B2D7 05C0  BB12 3360 C3F7 411A 125F

* remotes/artyom/tags/pull-sun4v-20170226:
  niagara: check if a serial port is available
  niagara: fail if a firmware file is missing

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 22:40:23 +00:00
Artyom Tarasenko
a5a08302d4 niagara: check if a serial port is available
Reported-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:46:08 +01:00
Artyom Tarasenko
5e3a549498 niagara: fail if a firmware file is missing
fail if a firmware file is missing and not qtest_enabled(),
the later is necessary to allow some basic tests if
firmware is not available

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Artyom Tarasenko <atar4qemu@gmail.com>
2017-02-26 22:44:25 +01:00
Peter Maydell
685783c5b6 Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into staging
slirp updates

# gpg: Signature made Sun 26 Feb 2017 14:40:00 GMT
# gpg:                using RSA key 0xB0A51BF58C9179C5
# gpg: Good signature from "Samuel Thibault <samuel.thibault@aquilenet.fr>"
# gpg:                 aka "Samuel Thibault <sthibault@debian.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@gnu.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@inria.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@labri.fr>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@ens-lyon.org>"
# gpg:                 aka "Samuel Thibault <samuel.thibault@u-bordeaux.fr>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 900C B024 B679 31D4 0F82  304B D017 8C76 7D06 9EE6
#      Subkey fingerprint: AEBF 7448 FAB9 453A 4552  390E B0A5 1BF5 8C91 79C5

* remotes/thibault/tags/samuel-thibault:
  slirp: tcp_listen(): Don't try to close() an fd we never opened
  slirp: Convert mbufs to use g_malloc() and g_free()
  slirp: Check qemu_socket() return value in udp_listen()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 16:38:40 +00:00
Peter Maydell
bd5d2353aa slirp: tcp_listen(): Don't try to close() an fd we never opened
Coverity points out (CID 1005725) that an error-exit path in tcp_listen()
will try to close(s) even if the reason it got there was that the
qemu_socket() failed and s was never opened.  Not only that, this isn't even
the right function to use, because we need closesocket() to do the right
thing on Windows.  Change to using the right function and only calling it if
needed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:29 +01:00
Peter Maydell
70f2e64e4d slirp: Convert mbufs to use g_malloc() and g_free()
The mbuf code currently doesn't check the result of doing a malloc()
or realloc() of its data (spotted by Coverity, CID 1238946).
Since the m_inc() API assumes that extending an mbuf must succeed,
just convert to g_malloc() and g_free().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:39:05 +01:00
Peter Maydell
4577b09a27 slirp: Check qemu_socket() return value in udp_listen()
Check the return value from qemu_socket() rather than trying to
pass it to bind() as an fd argument even if it's negative.
This wouldn't have caused any negative consequences, because
it won't be a valid fd number and the bind call will fail;
but Coverity complains (CID 1005723).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
2017-02-26 15:38:38 +01:00
Peter Maydell
6b4e463ff3 Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Fri 24 Feb 2017 18:08:26 GMT
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  tests: Use opened block node for block job tests
  vvfat: Use opened node as backing file
  block: Add bdrv_new_open_driver()
  block: Factor out bdrv_open_driver()
  block: Use BlockBackend for image probing
  block: Factor out bdrv_open_child_bs()
  block: Attach bs->file only during .bdrv_open()
  block: Pass BdrvChild to bdrv_truncate()
  mirror: Resize active commit base in mirror_run()
  qcow2: Use BB for resizing in qcow2_amend_options()
  blockdev: Use BlockBackend to resize in qmp_block_resize()
  iotests: Fix another race in 030
  qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
  qemu-img: Truncate before full preallocation
  qemu-img: Add tests for raw image preallocation
  qemu-img: Do not truncate before preallocation
  qemu-iotests: redirect nbd server stdout to /dev/null
  qemu-iotests: add ability to exclude certain protocols from tests
  qemu-iotests: Test 137 only supports 'file' protocol

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 12:26:37 +00:00
Peter Maydell
6528a4c1f2 Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Fri 24 Feb 2017 17:45:53 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  RBD: Add support readv,writev for rbd
  block/nfs: try to avoid the bounce buffer in pwritev
  block/nfs: convert to preadv / pwritev

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-26 11:47:00 +00:00
Peter Maydell
6d3f4c6d1d Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170224-2' into staging
MIPS patches 2017-02-24-2

CHanges:
* Add the Boston board with fixing the make check issue on 32-bit hosts.

# gpg: Signature made Fri 24 Feb 2017 11:43:45 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170224-2:
  hw/mips: MIPS Boston board support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 21:15:14 +00:00
Peter Maydell
28f997a82c Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-240217-1' into staging
This is the MTTCG pull-request as posted yesterday.

# gpg: Signature made Fri 24 Feb 2017 11:17:51 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-mttcg-240217-1: (24 commits)
  tcg: enable MTTCG by default for ARM on x86 hosts
  hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
  target-arm: ensure all cross vCPUs TLB flushes complete
  target-arm: don't generate WFE/YIELD calls for MTTCG
  target-arm/powerctl: defer cpu reset work to CPU context
  cputlb: introduce tlb_flush_*_all_cpus[_synced]
  cputlb: atomically update tlb fields used by tlb_reset_dirty
  cputlb: add tlb_flush_by_mmuidx async routines
  cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
  cputlb: introduce tlb_flush_* async work.
  cputlb: tweak qemu_ram_addr_from_host_nofail reporting
  cputlb: add assert_cpu_is_self checks
  tcg: handle EXCP_ATOMIC exception for system emulation
  tcg: enable thread-per-vCPU
  tcg: enable tb_lock() for SoftMMU
  tcg: remove global exit_request
  tcg: drop global lock during TCG code execution
  tcg: rename tcg_current_cpu to tcg_current_rr_cpu
  tcg: add kick timer for single-threaded vCPU emulation
  tcg: add options for enabling MTTCG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 18:43:52 +00:00
Peter Maydell
2421f381dc Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20170224' into staging
A selection of s390x patches:
- cleanups, fixes and improvements
- program check loop detection (useful with the corresponding kernel
  patch)
- wire up virtio-crypto for ccw
- and finally support many virtqueues for virtio-ccw

# gpg: Signature made Fri 24 Feb 2017 09:19:19 GMT
# gpg:                using RSA key 0xDECF6B93C6F02FAF
# gpg: Good signature from "Cornelia Huck <huckc@linux.vnet.ibm.com>"
# gpg:                 aka "Cornelia Huck <cornelia.huck@de.ibm.com>"
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0  18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20170224:
  s390x/css: handle format-0 TIC CCW correctly
  s390x/arch_dump: pass cpuid into notes sections
  s390x/arch_dump: use proper note name and note size
  virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
  s390x: bump ADAPTER_ROUTES_MAX_GSI
  virtio-ccw: check flic->adapter_routes_max_batch
  s390x: add property adapter_routes_max_batch
  virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
  virtio-ccw: add virtio-crypto-ccw device
  virtio-ccw: handle virtio 1 only devices
  s390x/flic: fail migration on source already
  s390x/kvm: detect some program check loops
  s390x/s390-virtio: get rid of DPRINTF

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 17:48:49 +00:00
Peter Maydell
f62ab6bb8f Merge remote-tracking branch 'remotes/famz/tags/for-upstream' into staging
Docker testing and shippable patches

Hi Peter,

These are testing and build automation patches:

- Shippable.com powered CI config
- Docker cross build
- Fixes and MAINTAINERS tweaks.

# gpg: Signature made Fri 24 Feb 2017 06:31:10 GMT
# gpg:                using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021  AD56 CA35 624C 6A91 71C6

* remotes/famz/tags/for-upstream:
  docker: Install python2 explicitly in docker image
  MAINTAINERS: merge Build and test automation with Docker tests
  .shippable.yml: new CI provider
  new: debian docker targets for cross-compiling
  tests/docker: add basic user mapping support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-25 16:37:32 +00:00
Peter Maydell
d7941f4eed Merge remote-tracking branch 'remotes/armbru/tags/pull-util-2017-02-23' into staging
option cutils: Fix and clean up number conversions

# gpg: Signature made Thu 23 Feb 2017 19:41:17 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-util-2017-02-23: (24 commits)
  option: Fix checking of sizes for overflow and trailing crap
  util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
  util/cutils: Return qemu_strtosz*() error and value separately
  util/cutils: Let qemu_strtosz*() optionally reject trailing crap
  qemu-img: Wrap cvtnum() around qemu_strtosz()
  test-cutils: Drop suffix from test_qemu_strtosz_simple()
  test-cutils: Use qemu_strtosz() more often
  util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
  util/cutils: New qemu_strtosz()
  util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
  util/cutils: New qemu_strtosz_metric()
  test-cutils: Cover qemu_strtosz() around range limits
  test-cutils: Cover qemu_strtosz() with trailing crap
  test-cutils: Cover qemu_strtosz() invalid input
  test-cutils: Add missing qemu_strtosz()... endptr checks
  option: Fix to reject invalid and overflowing numbers
  util/cutils: Clean up control flow around qemu_strtol() a bit
  util/cutils: Clean up variable names around qemu_strtol()
  util/cutils: Rename qemu_strtoll(), qemu_strtoull()
  util/cutils: Rewrite documentation of qemu_strtol() & friends
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 18:34:27 +00:00
tianqing
1d393bdeae RBD: Add support readv,writev for rbd
Rbd can do readv and writev directly, so wo do not need to transform
iov to buf or vice versa any more.

Signed-off-by: tianqing <tianqing@unitedstack.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:43:01 -05:00
Peter Lieven
ef503a8417 block/nfs: try to avoid the bounce buffer in pwritev
if the passed qiov contains exactly one iov we can
pass the buffer directly.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-3-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Lieven
69785a229d block/nfs: convert to preadv / pwritev
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1487349541-10201-2-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-24 12:38:35 -05:00
Peter Maydell
6959e4523e Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170223.0' into staging
VFIO updates 2017-02-23

 - Report qdev_unplug errors (Alex Williamson)
 - Fix ecap ID 0 handling, improve comment (Alex Williamson)
 - Disable IGD stolen memory in UPT mode too (Xiong Zhang)

# gpg: Signature made Thu 23 Feb 2017 19:04:17 GMT
# gpg:                using RSA key 0x239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg:                 aka "Alex Williamson <alex@shazbot.org>"
# gpg:                 aka "Alex Williamson <alwillia@redhat.com>"
# gpg:                 aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B  8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-updates-20170223.0:
  vfio/pci-quirks.c: Disable stolen memory for igd VFIO
  vfio/pci: Improve extended capability comments, skip masked caps
  vfio/pci: Report errors from qdev_unplug() via device request

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 17:27:59 +00:00
Kevin Wolf
d185cf0ec6 tests: Use opened block node for block job tests
blk_insert_bs() and block job related functions will soon require an
opened block node (permission calculations will involve the block
driver), so let our tests be consistent with the real users in this
respect.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
a8a4d15c1c vvfat: Use opened node as backing file
We should not try to assign a not yet opened node as the backing file,
because as soon as the permission system is added it will fail.  The
just added bdrv_new_open_driver() function is the right tool to open a
file with an internal driver, use it.

In case anyone wonders whether that magic fake backing file to trigger a
special action on 'commit' actually works today: No, not for me. One
reason is that we've been adding a raw format driver on top for several
years now and raw doesn't support commit. Other reasons include that the
backing file isn't writable and the driver doesn't support reopen, and
it's also size 0 and the driver doesn't support bdrv_truncate. All of
these are easily fixable, but then 'commit' ended up in an infinite loop
deep in the vvfat code for me, so I thought I'd best leave it alone. I'm
not really sure what it was supposed to do anyway.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
680c7f9606 block: Add bdrv_new_open_driver()
This function allows to create more or less normal BlockDriverStates
even for BlockDrivers that aren't globally registered (e.g. helper
filters for block jobs).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
01a5650179 block: Factor out bdrv_open_driver()
This is a function that doesn't do any option parsing, but just does
some basic BlockDriverState setup and calls the .bdrv_open() function of
the block driver.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
5696c6e350 block: Use BlockBackend for image probing
This fixes the use of a parent-less BdrvChild in bdrv_open_inherit() by
converting it into a BlockBackend. Which is exactly what it should be,
image probing is an external, standalone user of a node. The requests
can't be considered to originate from the format driver node because
that one isn't even opened yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
2d6b86af14 block: Factor out bdrv_open_child_bs()
This is the part of bdrv_open_child() that opens a BDS with option
inheritance, but doesn't attach it as a child to the parent yet.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
4e4bf5c42c block: Attach bs->file only during .bdrv_open()
The way that attaching bs->file worked was a bit unusual in that it was
the only child that would be attached to a node which is not opened yet.
Because of this, the block layer couldn't know yet which permissions the
driver would eventually need.

This patch moves the point where bs->file is attached to the beginning
of the individual .bdrv_open() implementations, so drivers already know
what they are going to do with the child. This is also more consistent
with how driver-specific children work.

For a moment, bdrv_open() gets its own BdrvChild to perform image
probing, but instead of directly assigning this BdrvChild to the BDS, it
becomes a temporary one and the node name is passed as an option to the
drivers, so that they can simply use bdrv_open_child() to create another
reference for their own use.

This duplicated child for (the not opened yet) bs is not the final
state, a follow-up patch will change the image probing code to use a
BlockBackend, which is completely independent of bs.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
52cdbc5869 block: Pass BdrvChild to bdrv_truncate()
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
becc347e1c mirror: Resize active commit base in mirror_run()
This is more consistent with the commit block job, and it moves the code
to a place where we already have the necessary BlockBackends to resize
the base image when bdrv_truncate() is changed to require a BdrvChild.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
70b27f3643 qcow2: Use BB for resizing in qcow2_amend_options()
In order to able to convert bdrv_truncate() to take a BdrvChild and
later to correctly check the resize permission here, we need to use a
BlockBackend for resizing the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
Kevin Wolf
7dad9ee646 blockdev: Use BlockBackend to resize in qmp_block_resize()
In order to be able to do permission checking and to keep working with
the BdrvChild based bdrv_truncate() that this involves, we need to
create a temporary BlockBackend to resize the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
2017-02-24 16:09:23 +01:00
John Snow
2c3b44da07 iotests: Fix another race in 030
We can't rely on a non-paused job to be present and running for us.
Assume that if the job is not present that it completed already.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
c6ccc2c5e6 qemu-img: Improve documentation for PREALLOC_MODE_FALLOC
Now that we are truncating the file in both PREALLOC_MODE_FULL and
PREALLOC_MODE_OFF, not truncating in PREALLOC_MODE_FALLOC looks odd.
Add a comment explaining why we do not truncate in this case.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
5a1dad9d5a qemu-img: Truncate before full preallocation
In a previous commit (qemu-img: Do not truncate before preallocation) we
moved truncate to the PREALLOC_MODE_OFF branch to avoid slowdown in
posix_fallocate().

However this change is not optimal when using PREALLOC_MODE_FULL, since
knowing the final size from the beginning could allow the file system
driver to do less allocations and possibly avoid fragmentation of the
file.

Now we truncate also before doing full preallocation.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:23 +01:00
Nir Soffer
6f993f3fca qemu-img: Add tests for raw image preallocation
Add tests for creating raw image with and without the preallocation
option.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Nir Soffer
f6a7240442 qemu-img: Do not truncate before preallocation
When using file system that does not support fallocate() (e.g. NFS <
4.2), truncating the file only when preallocation=OFF speeds up creating
raw file.

Here is example run, tested on Fedora 24 machine, creating raw file on
NFS version 3 server.

$ time ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m21.185s
user	0m0.022s
sys	0m0.574s

$ time ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 1g
Formatting 'mnt/test', fmt=raw size=1073741824 preallocation=falloc

real	0m11.601s
user	0m0.016s
sys	0m0.525s

$ time dd if=/dev/zero of=mnt/test bs=1M count=1024 oflag=direct
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 15.6627 s, 68.6 MB/s

real	0m16.104s
user	0m0.009s
sys	0m0.220s

Running with strace we can see that without this change we do one
pread() and one pwrite() for each block. With this change, we do only
one pwrite() per block.

$ strace ./qemu-img-master create -f raw -o preallocation=falloc mnt/test 8192
...
pread64(9, "\0", 1, 4095)               = 1
pwrite64(9, "\0", 1, 4095)              = 1
pread64(9, "\0", 1, 8191)               = 1
pwrite64(9, "\0", 1, 8191)              = 1

$ strace ./qemu-img-fix create -f raw -o preallocation=falloc mnt/test 8192
...
pwrite64(9, "\0", 1, 4095)              = 1
pwrite64(9, "\0", 1, 8191)              = 1

This happens because posix_fallocate is checking if each block is
allocated before writing a byte to the block, and when truncating the
file before preallocation, all blocks are unallocated.

Signed-off-by: Nir Soffer <nirsof@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
43421ea05f qemu-iotests: redirect nbd server stdout to /dev/null
Some iotests (e.g. 174) try to filter the output of _make_test_image by
piping the stdout.  Pipe the server stdout to /dev/null, so that filter
pipe does not need to wait until process completion.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
dfac03dcd4 qemu-iotests: add ability to exclude certain protocols from tests
Add the ability for shell script tests to exclude specific
protocols.  This is useful to allow all protocols except ones known to
not support a feature used in the test (e.g. .bdrv_create).

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Jeff Cody
2b12baf0e3 qemu-iotests: Test 137 only supports 'file' protocol
Since test 137 make uses of qcow2.py, only local files are supported.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2017-02-24 16:09:22 +01:00
Peter Maydell
fe8ee082db Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-22' into staging
QAPI patches for 2017-02-22

# gpg: Signature made Wed 22 Feb 2017 19:12:27 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-22:
  block: Don't bother asserting type of output visitor's output
  monitor: Clean up handle_hmp_command() a bit
  tests: Don't check qobject_type() before qobject_to_qbool()
  tests: Don't check qobject_type() before qobject_to_qfloat()
  tests: Don't check qobject_type() before qobject_to_qint()
  tests: Don't check qobject_type() before qobject_to_qstring()
  tests: Don't check qobject_type() before qobject_to_qlist()
  Don't check qobject_type() before qobject_to_qdict()
  test-qmp-event: Simplify and tighten event_test_emit()
  libqtest: Clean up qmp_response() a bit
  check-qjson: Simplify around compare_litqobj_to_qobj()
  check-qdict: Tighten qdict_crumple_test_recursive() some
  check-qdict: Simplify qdict_crumple_test_recursive()
  qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
  net: Flatten simple union NetLegacyOptions
  numa: Flatten simple union NumaOptions

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 15:00:51 +00:00
Peter Maydell
63f495beb4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-cve-2017-2620-20170224-1' into staging
cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

# gpg: Signature made Fri 24 Feb 2017 13:42:39 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-cve-2017-2620-20170224-1:
  cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 13:55:26 +00:00
Gerd Hoffmann
92f2b88cea cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)
CIRRUS_BLTMODE_MEMSYSSRC blits do NOT check blit destination
and blit width, at all.  Oops.  Fix it.

Security impact: high.

The missing blit destination check allows to write to host memory.
Basically same as CVE-2014-8106 for the other blit variants.

Cc: qemu-stable@nongnu.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-24 14:35:50 +01:00
Peter Maydell
5842b55fd4 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170223-1' into staging
usb: ohci bugfix, switch core to unrealize, xhci property cleanup

# gpg: Signature made Thu 23 Feb 2017 15:37:57 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170223-1:
  xhci: properties cleanup
  usb: ohci: fix error return code in servicing td
  usb: replace handle_destroy with unrealize

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 12:49:04 +00:00
Paul Burton
df1d8a1f29 hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly.
  changed default memory size into 1G due to make check failure
  on 32-bit hosts]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-24 10:37:21 +00:00
Alex Bennée
ca759f9e38 tcg: enable MTTCG by default for ARM on x86 hosts
This enables the multi-threaded system emulation by default for ARMv7
and ARMv8 guests using the x86_64 TCG backend. This is because on the
guest side:

  - The ARM translate.c/translate-64.c have been converted to
    - use MTTCG safe atomic primitives
    - emit the appropriate barrier ops
  - The ARM machine has been updated to
    - hold the BQL when modifying shared cross-vCPU state
    - defer powerctl changes to async safe work

All the host backends support the barrier and atomic primitives but
need to provide same-or-better support for normal load/store
operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Acked-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Pranith Kumar <bobby.prani@gmail.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:46 +00:00
Alex Bennée
4881658a4b hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
The arm_reset_cpu/set_cpu_on/set_cpu_off() functions do their work
asynchronously in the target vCPUs context. As a result we need to
ensure the SRC_SCR reset bits correctly report the reset status at the
right time. To do this we defer the clearing of the bit with an async
job which will run after the work queued by ARM powerctl functions.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
a67cf27727 target-arm: ensure all cross vCPUs TLB flushes complete
Previously flushes on other vCPUs would only get serviced when they
exited their TranslationBlocks. While this isn't overly problematic it
violates the semantics of TLB flush from the point of view of source
vCPU.

To solve this we call the cputlb *_all_cpus_synced() functions to do
the flushes which ensures all flushes are completed by the time the
vCPU next schedules its own work. As the TLB instructions are modelled
as CP writes the TB ends at this point meaning cpu->exit_request will
be checked before the next instruction is executed.

Deferring the work until the architectural sync point is a possible
future optimisation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c22edfebff target-arm: don't generate WFE/YIELD calls for MTTCG
The WFE and YIELD instructions are really only hints and in TCG's case
they were useful to move the scheduling on from one vCPU to the next. In
the parallel context (MTTCG) this just causes an unnecessary cpu_exit
and contention of the BQL.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
062ba099e0 target-arm/powerctl: defer cpu reset work to CPU context
When switching a new vCPU on we want to complete a bunch of the setup
work before we start scheduling the vCPU thread. To do this cleanly we
defer vCPU setup to async work which will run the vCPUs execution
context as the thread is woken up. The scheduling of the work will kick
the vCPU awake.

This avoids potential races in MTTCG system emulation.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
Alex Bennée
c3b9a07a33 cputlb: introduce tlb_flush_*_all_cpus[_synced]
This introduces support to the cputlb API for flushing all CPUs TLBs
with one call. This avoids the need for target helpers to iterate
through the vCPUs themselves.

An additional variant of the API (_synced) will cause the source vCPUs
work to be scheduled as "safe work". The result will be all the flush
operations will be complete by the time the originating vCPU executes
its safe work. The calling implementation can either end the TB
straight away (which will then pick up the cpu->exit_request on
entering the next block) or defer the exit until the architectural
sync point (usually a barrier instruction).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
b0706b7167 cputlb: atomically update tlb fields used by tlb_reset_dirty
The main use case for tlb_reset_dirty is to set the TLB_NOTDIRTY flags
in TLB entries to force the slow-path on writes. This is used to mark
page ranges containing code which has been translated so it can be
invalidated if written to. To do this safely we need to ensure the TLB
entries in question for all vCPUs are updated before we attempt to run
the code otherwise a race could be introduced.

To achieve this we atomically set the flag in tlb_reset_dirty_range and
take care when setting it when the TLB entry is filled.

On 32 bit systems attempting to emulate 64 bit guests we don't even
bother as we might not have the atomic primitives available. MTTCG is
disabled in this case and can't be forced on. The copy_tlb_helper
function helps keep the atomic semantics in one place to avoid
confusion.

The dirty helper function is made static as it isn't used outside of
cputlb.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
e72184455c cputlb: add tlb_flush_by_mmuidx async routines
This converts the remaining TLB flush routines to use async work when
detecting a cross-vCPU flush. The only minor complication is having to
serialise the var_list of MMU indexes into a form that can be punted
to an asynchronous job.

The pending_tlb_flush field on QOM's CPU structure also becomes a
bitfield rather than a boolean.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
0336cbf853 cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
While the vargs approach was flexible the original MTTCG ended up
having munge the bits to a bitmap so the data could be used in
deferred work helpers. Instead of hiding that in cputlb we push the
change to the API to make it take a bitmap of MMU indexes instead.

For ARM some the resulting flushes end up being quite long so to aid
readability I've tended to move the index shifting to a new line so
all the bits being or-ed together line up nicely, for example:

    tlb_flush_page_by_mmuidx(other_cs, pageaddr,
                             (1 << ARMMMUIdx_S1SE1) |
                             (1 << ARMMMUIdx_S1SE0));

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
[AT: SPARC parts only]
Reviewed-by: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
[PM: ARM parts only]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:46 +00:00
KONRAD Frederic
e3b9ca8109 cputlb: introduce tlb_flush_* async work.
Some architectures allow to flush the tlb of other VCPUs. This is not a problem
when we have only one thread for all VCPUs but it definitely needs to be an
asynchronous work when we are in true multithreaded work.

We take the tb_lock() when doing this to avoid racing with other threads
which may be invalidating TB's at the same time. The alternative would
be to use proper atomic primitives to clear the tlb entries en-mass.

This patch doesn't do anything to protect other cputlb function being
called in MTTCG mode making cross vCPU changes.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: remove need for g_malloc on defer, make check fixes, tb_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
857baec1d9 cputlb: tweak qemu_ram_addr_from_host_nofail reporting
This moves the helper function closer to where it is called and updates
the error message to report via error_report instead of the deprecated
fprintf.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Alex Bennée
f0aff0f124 cputlb: add assert_cpu_is_self checks
For SoftMMU the TLB flushes are an example of a task that can be
triggered on one vCPU by another. To deal with this properly we need to
use safe work to ensure these changes are done safely. The new assert
can be enabled while debugging to catch these cases.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:46 +00:00
Pranith Kumar
08e73c48b0 tcg: handle EXCP_ATOMIC exception for system emulation
The patch enables handling atomic code in the guest. This should be
preferably done in cpu_handle_exception(), but the current assumptions
regarding when we can execute atomic sections cause a deadlock.

The current mechanism discards the flags which were set in atomic
execution. We ensure they are properly saved by calling the
cc->cpu_exec_enter/leave() functions around the loop.

As we are running cpu_exec_step_atomic() from the outermost loop we
need to avoid an abort() when single stepping over atomic code since
debug exception longjmp will point to the the setlongjmp in
cpu_exec(). We do this by setting a new jmp_env so that it jumps back
here on an exception.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak title, merge with new patches, add mmap_lock]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
CC: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
372579427a tcg: enable thread-per-vCPU
There are a couple of changes that occur at the same time here:

  - introduce a single vCPU qemu_tcg_cpu_thread_fn

  One of these is spawned per vCPU with its own Thread and Condition
  variables. qemu_tcg_rr_cpu_thread_fn is the new name for the old
  single threaded function.

  - the TLS current_cpu variable is now live for the lifetime of MTTCG
    vCPU threads. This is for future work where async jobs need to know
    the vCPU context they are operating in.

The user to switch on multi-thread behaviour and spawn a thread
per-vCPU. For a simple test kvm-unit-test like:

  ./arm/run ./arm/locking-test.flat -smp 4 -accel tcg,thread=multi

Will now use 4 vCPU threads and have an expected FAIL (instead of the
unexpected PASS) as the default mode of the test has no protection when
incrementing a shared variable.

We enable the parallel_cpus flag to ensure we generate correct barrier
and atomic code if supported by the front and backends. This doesn't
automatically enable MTTCG until default_mttcg_enabled() is updated to
check the configuration is supported.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[AJB: Some fixes, conditionally, commit rewording]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2f16960660 tcg: enable tb_lock() for SoftMMU
tb_lock() has long been used for linux-user mode to protect code
generation. By enabling it now we prepare for MTTCG and ensure all code
generation is serialised by this lock. The other major structure that
needs protecting is the l1_map and its PageDesc structures. For the
SoftMMU case we also use tb_lock() to protect these structures instead
of linux-user mmap_lock() which as the name suggests serialises updates
to the structure as a result of guest mmap operations.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
e5143e30fb tcg: remove global exit_request
There are now only two uses of the global exit_request left.

The first ensures we exit the run_loop when we first start to process
pending work and in the kick handler. This is just as easily done by
setting the first_cpu->exit_request flag.

The second use is in the round robin kick routine. The global
exit_request ensured every vCPU would set its local exit_request and
cause a full exit of the loop. Now the iothread isn't being held while
running we can just rely on the kick handler to push us out as intended.

We lightly re-factor the main vCPU thread to ensure cpu->exit_requests
cause us to exit the main loop and process any IO requests that might
come along. As an cpu->exit_request may legitimately get squashed
while processing the EXCP_INTERRUPT exception we also check
cpu->queued_work_first to ensure queued work is expedited as soon as
possible.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Jan Kiszka
8d04fb55de tcg: drop global lock during TCG code execution
This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
These numbers demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com>
[FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex]
Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[EGC: fixed iothread lock for cpu-exec IRQ handling]
Signed-off-by: Emilio G. Cota <cota@braap.org>
[AJB: -smp single-threaded fix, clean commit msg, BQL fixes]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
[PM: target-arm changes]
Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:32:45 +00:00
Alex Bennée
791158d93b tcg: rename tcg_current_cpu to tcg_current_rr_cpu
..and make the definition local to cpus. In preparation for MTTCG the
concept of a global tcg_current_cpu will no longer make sense. However
we still need to keep track of it in the single-threaded case to be able
to exit quickly when required.

qemu_cpu_kick_no_halt() moves and becomes qemu_cpu_kick_rr_cpu() to
emphasise its use-case. qemu_cpu_kick now kicks the relevant cpu as
well as qemu_kick_rr_cpu() which will become a no-op in MTTCG.

For the time being the setting of the global exit_request remains.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
Alex Bennée
6546706d28 tcg: add kick timer for single-threaded vCPU emulation
Currently we rely on the side effect of the main loop grabbing the
iothread_mutex to give any long running basic block chains a kick to
ensure the next vCPU is scheduled. As this code is being re-factored and
rationalised we now do it explicitly here.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
2017-02-24 10:32:45 +00:00
KONRAD Frederic
8d4e9146b3 tcg: add options for enabling MTTCG
We know there will be cases where MTTCG won't work until additional work
is done in the front/back ends to support. It will however be useful to
be able to turn it on.

As a result MTTCG will default to off unless the combination is
supported. However the user can turn it on for the sake of testing.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
[AJB: move to -accel tcg,thread=multi|single, defaults]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
2093714314 tcg: move TCG_MO/BAR types into own file
We'll be using the memory ordering definitions to define values for
both the host and guest. To avoid fighting with circular header
dependencies just move these types into their own minimal header.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
4ec667042d mttcg: Add missing tb_lock/unlock() in cpu_exec_step()
The recent patch enabling lock assertions uncovered the missing lock
acquisition in cpu_exec_step(). This patch adds them.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Pranith Kumar
6ac3d7e845 mttcg: translate-all: Enable locking debug in a debug build
Enable tcg lock debug asserts in a debug build by default instead of
relying on DEBUG_LOCKING. None of the other DEBUG_* macros have
asserts, so this patch removes DEBUG_LOCKING and enable these asserts
in a debug build.

CC: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
[AJB: tweak ifdefs so can be early in series]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Alex Bennée
c6489dd921 docs: new design document multi-thread-tcg.txt
This documents the current design for upgrading TCG emulation to take
advantage of modern CPUs by running a thread-per-CPU. The document goes
through the various areas of the code affected by such a change and
proposes design requirements for each part of the solution.

The text marked with (Current solution[s]) to document what the current
approaches being used are.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
2017-02-24 10:32:45 +00:00
Peter Maydell
5522924718 Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170222' into staging
ppc patch queue for 2017-02-22

This pull request has:
   * Yet more POWER9 instruction implementations
   * Some extensions to the softfloat code which are necesssary for
     some of those instructions
   * Some preliminary patches in preparation for POWER9 softmmu
     implementation
   * Igor Mammedov's cleanups to unify hotplug cpu handling across
     architectures
   * Assorted bugfixes

The softfloat and cpu hotplug changes aren't entirely ppc specific (in
fact the hotplug stuff contains some pc specific patches).  However
they're included here because ppc is one of the main beneficiaries,
and the series depend on some ppc specific patches.

# gpg: Signature made Wed 22 Feb 2017 06:29:47 GMT
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg:                 aka "David Gibson (kernel.org) <dwg@kernel.org>"
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.9-20170222: (43 commits)
  hw/ppc/ppc405_uc.c: Avoid integer overflows
  hw/ppc/spapr: Check for valid page size when hot plugging memory
  target-ppc: fix Book-E TLB matching
  hw/net/spapr_llan: 6 byte mac address device tree entry
  machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
  machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
  spapr: reuse machine->possible_cpus instead of cores[]
  change CPUArchId.cpu type to Object*
  pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
  pc: calculate topology only once when possible_cpus is initialised
  pc: move pcms->possible_cpus init out of pc_cpus_init()
  machine: move possible_cpus to MachineState
  hw/pci-host/prep: Do not use hw_error() in realize function
  target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
  target/ppc/POWER9: Adapt LPCR handling for POWER9
  target/ppc/POWER9: Add ISAv3.00 MMU definition
  target/ppc: Fix LPCR DPFD mask define
  target-ppc: Add xscvqpudz and xscvqpuwz instructions
  target-ppc: Implement round to odd variants of quad FP instructions
  softfloat: Add float128_to_uint32_round_to_zero()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-24 10:13:57 +00:00
Dong Jia Shi
9f94f84ce7 s390x/css: handle format-0 TIC CCW correctly
For TIC CCW, bit positions 8-32 of the format-1 CCW must contain zeros;
otherwise, a program-check condition is generated. For format-0 TIC CCWs,
bits 32-63 are ignored.

To convert TIC from format-0 CCW to format-1 CCW correctly, let's clear
bits 8-32 to guarantee compatibility.

Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
f738f296ea s390x/arch_dump: pass cpuid into notes sections
we need to pass the cpuid into the pid field of the notes
section, otherwise the notes for different CPUs all have 0:

e.g. objdump -h shows:
old:
  5 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/0 00000004  0000000000000000  0000000000000000
new:
  5 .reg-s390-prefix/1 00000004  0000000000000000  0000000000000000
  6 .reg-s390-prefix 00000004  0000000000000000  0000000000000000
 21 .reg-s390-prefix/2 00000004  0000000000000000  0000000000000000

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
5f706fdc16 s390x/arch_dump: use proper note name and note size
In binutils/libbfd (bfd/elf.c) it is enforced that all s390
specific ELF notes like e.g. NT_S390_PREFIX or NT_S390_CTRS
have "LINUX" specified as note name and that the namesz is
6. Otherwise the notes are ignored.

QEMU currently uses "CORE" for these notes. Up to now this has
not been a real problem because the dump analysis tool "crash"
does handle that. But it will break all programs that use libbfd
for processing ELF notes.

So fix this and use "LINUX" for all s390 specific notes to comply
with libbfd. Also set the correct namesz.

Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
b1914b824a virtio-ccw: support VIRTIO_QUEUE_MAX virtqueues
The maximal number of virtqueues per device can be limited on a per
transport basis. For virtio-ccw this limit is defined by
VIRTIO_CCW_QUEUE_MAX, however the limitation used to come form the
number of adapter routes supported by flic (via notifiers).

Recently the limitation of the flic was adjusted so that it can
accommodate VIRTIO_QUEUE_MAX queues, and is in the meanwhile checked for
separately too.

Let us remove the transport specific limitation of virtio-ccw by
dropping VIRTIO_CCW_QUEUE_MAX and using VIRTIO_QUEUE_MAX instead.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
069097dad3 s390x: bump ADAPTER_ROUTES_MAX_GSI
Let's increase ADAPTER_ROUTES_MAX_GSI to VIRTIO_QUEUE_MAX which is the
largest demand foreseeable at the moment. Let us add a compatibility
macro for the previous machines so client code can maintain backwards
migration compatibility

To not mess up migration compatibility for virtio-ccw
VIRTIO_CCW_QUEUE_MAX is left at it's current value, and will be dropped
when virtio-ccw is converted to use the capability of the flic
introduced by this patch.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
0708afa704 virtio-ccw: check flic->adapter_routes_max_batch
Currently VIRTIO_CCW_QUEUE_MAX is defined as ADAPTER_ROUTES_MAX_GSI.
That is when checking queue max we implicitly check the constraint
concerning the number of adapter routes. This won't be satisfactory any
more (due to backward migration considerations) if ADAPTER_ROUTES_MAX_GSI
changes (ADAPTER_ROUTES_MAX_GSI is going to change because we want to
support up to VIRTIO_QUEUE_MAX queues per virtio-ccw device).

Let us introduce a check on a recently introduce flic property which
gives us the compatibility machine aware limit on adapter routes.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
e61cc6b5c6 s390x: add property adapter_routes_max_batch
To make virtio-ccw supports more that  64 virtqueues we will have to
increase ADAPTER_ROUTES_MAX_GSI which is currently limiting the number if
possible adapter routes. Of course increasing the number of supported
routes can break backwards migration.

Let us introduce a compatibility property adapter_routes_max_batch so
client code can use the some old limit if in compatibility mode and
retain the migration compatibility.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
797b608638 virtio-ccw: Check the number of vqs in CCW_CMD_SET_IND
We cannot support more than 64 virtqueues with the 64 bits provided by
classic indicators. If a driver tries to setup classic indicators
(which it is free to do even for virtio-1 devices) for a device with
more than 64 virtqueues, we should reject the attempt so that the
driver does not end up with an unusable device.

This is in preparation for bumping the number of supported virtqueues
on the ccw transport.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
d2256070d2 virtio-ccw: add virtio-crypto-ccw device
Wire up virtio-crypto for the CCW based VIRTIO.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Halil Pasic
47e13dfd86 virtio-ccw: handle virtio 1 only devices
As a preparation for wiring-up virtio-crypto, the first non-transitional
virtio device on the ccw transport, let us introduce a mechanism for
disabling revision 0.  This is more or less equivalent with disabling
legacy as revision 0 is legacy only, and legacy drivers use the revision
0 exclusively.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
ba690c7171 s390x/flic: fail migration on source already
Current code puts a 'FLIC_FAILED' marker into the migration stream
to indicate something went wrong while saving flic state and fails
load if it encounters that marker. VMState's put routine recently
gained the ability to return error codes (but did not wire it up
yet).

In order to be able to reap the benefits of returning an error and
failing migration on the source already once this gets wired up
in core, return an error in addition to storing 'FLIC_FAILED'.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
2017-02-24 10:15:18 +01:00
Christian Borntraeger
409422cd83 s390x/kvm: detect some program check loops
Sometimes (e.g. early boot) a guest is broken in such ways that it loops
100% delivering operation exceptions (illegal operation) but the pgm new
PSW is not set properly. This will result in code being read from
address zero, which usually contains another illegal op. Let's detect
this case and put the guest in crashed state. Instead of only detecting
this for address zero apply a heuristic that will work for any program
check new psw so that it will also reach the crashed state if you
provide some random elf file to the -kernel option.
We do not want guest problem state to be able to trigger a guest panic,
e.g. by faulting on an address that is the same as the program check
new PSW, so we check for the problem state bit being off.

With this we
a: get rid of CPU consumption of such broken guests
b: keep the program old PSW. This allows to find out the original illegal
   operation - making debugging such early boot issues much easier than
   with single stepping

This relies on the kernel using a similar heuristic and passing such
operation exceptions to user space.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
2017-02-24 10:15:18 +01:00
Cornelia Huck
94b5024b1f s390x/s390-virtio: get rid of DPRINTF
The DPRINTF approach is likely to introduce bitrot, and the preferred
way for debugging is tracing anyway. Fortunately, there are no users
(left), so nuke it.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-24 10:15:18 +01:00
Fam Zheng
a8f159d45b docker: Install python2 explicitly in docker image
Python is no longer installed implicitly, but the QEMU build system
requires it. List it in PACKAGES.

Reported-by: Auger Eric <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170222021801.28658-1-famz@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
e70dc7f854 MAINTAINERS: merge Build and test automation with Docker tests
The docker framework is really just another piece in the build
automation puzzle so lets merge it together. For added bonus I've also
included the Travis and Patchew status links. The Shippable links will
be added later once mainline tests have been configured and setup.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-5-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
d92d886a3b .shippable.yml: new CI provider
Ostensibly Shippable offers a similar set of services as Travis.
However they are focused on Docker container based work-flows so we
can use our existing containers to run a few extra builds - in this
case a bunch of cross-compiled targets on a Debian multiarch system.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-Id: <20170220105139.21581-4-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
24e0131f37 new: debian docker targets for cross-compiling
This provides a basic Debian install with access to the emdebian cross
compilers. The debian-armhf-cross and debian-arm64-cross targets build
on the basic Debian image to allow cross compiling to those targets.

A new environment variable (QEMU_CONFIGURE_OPTS) is set as part of the
docker container and passed to the build to specify the
--cross-prefix. The user still calls the build in the usual way, for
example:

  make docker-test-build@debian-arm64-cross \
    TARGET_LIST="aarch64-softmmu,aarch64-linux-user"

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-3-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Alex Bennée
414a8ce57e tests/docker: add basic user mapping support
Currently all docker builds are done by exporting a tarball to the
docker container and running the build as the containers root user.
Other use cases are possible however and it is possible to map a part
of users file-system to the container. This is useful for example for
doing cross-builds of arbitrary source trees. For this to work
smoothly the container needs to have a user created that maps cleanly
to the host system.

This adds a -u option to the docker script so that:

  DEB_ARCH=armhf DEB_TYPE=stable ./tests/docker/docker.py build \
    -u --include-executable=arm-linux-user/qemu-arm \
    debian:armhf ./tests/docker/dockerfiles/debian-bootstrap.docker

Will build a container that can then be run like:

  docker run --rm -it -v /home/alex/lsrc/qemu/risu.git/:/src \
    --user=alex:alex -w /src/ debian:armhf \
    sh -c "make clean && ./configure -s && make"

All docker containers built will add the current user unless
explicitly disabled by specifying NOUSER when invoking the Makefile:

  make docker-image-debian-armhf-cross NOUSER=1

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20170220105139.21581-2-alex.bennee@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
2017-02-24 14:18:11 +08:00
Markus Armbruster
75cdcd1553 option: Fix checking of sizes for overflow and trailing crap
parse_option_size()'s checking for overflow and trailing crap is
wrong.  Has always been that way.  qemu_strtosz() gets it right, so
use that.

This adds support for size suffixes 'P', 'E', and ignores case for all
suffixes, not just 'k'.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-25-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f46bfdbfc8 util/cutils: Change qemu_strtosz*() from int64_t to uint64_t
This will permit its use in parse_option_size().

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-24-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
f17fd4fdf0 util/cutils: Return qemu_strtosz*() error and value separately
This makes qemu_strtosz(), qemu_strtosz_mebi() and
qemu_strtosz_metric() similar to qemu_strtoi64(), except negative
values are rejected.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-23-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
4fcdf65ae2 util/cutils: Let qemu_strtosz*() optionally reject trailing crap
Change the qemu_strtosz() & friends to return -EINVAL when @endptr is
null and the conversion doesn't consume the string completely.
Matches how qemu_strtol() & friends work.

Only test_qemu_strtosz_simple() passes a null @endptr.  No functional
change there, because its conversion consumes the string.

Simplify callers that use @endptr only to fail when it doesn't point
to '\0' to pass a null @endptr instead.

Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com> (maintainer:X86)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: Max Reitz <mreitz@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:Block layer core)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1487708048-2131-22-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
606caa0a2a qemu-img: Wrap cvtnum() around qemu_strtosz()
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Max Reitz <mreitz@redhat.com>
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-21-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
dab9cc9237 test-cutils: Drop suffix from test_qemu_strtosz_simple()
Leave testing unit suffixes to test_qemu_strtosz_units().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-20-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
753f8da0e0 test-cutils: Use qemu_strtosz() more often
Use qemu_strtosz() instead of qemu_strtosz_MiB() where it doesn't
really make a difference.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-19-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
17f942560e util/cutils: Drop QEMU_STRTOSZ_DEFSUFFIX_* macros
Writing QEMU_STRTOSZ_DEFSUFFIX_* instead of '*' gains nothing.  Get
rid of these eyesores.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-18-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
466dea14e6 util/cutils: New qemu_strtosz()
Most callers of qemu_strtosz_suffix() pass QEMU_STRTOSZ_DEFSUFFIX_B.
Capture the pattern in new qemu_strtosz().

Inline qemu_strtosz_suffix() into its only remaining caller.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-17-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
e591591b32 util/cutils: Rename qemu_strtosz() to qemu_strtosz_MiB()
With qemu_strtosz(), no suffix means mebibytes.  It's used rarely.
I'm going to add a similar function where no suffix means bytes.
Rename qemu_strtosz() to qemu_strtosz_MiB() to make the name
qemu_strtosz() available for the new function.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-16-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
d2734d2629 util/cutils: New qemu_strtosz_metric()
To parse numbers with metric suffixes, we use

    qemu_strtosz_suffix_unit(nptr, &eptr, QEMU_STRTOSZ_DEFSUFFIX_B, 1000)

Capture this in a new function for legibility:

    qemu_strtosz_metric(nptr, &eptr)

Replace test_qemu_strtosz_suffix_unit() by test_qemu_strtosz_metric().

Rename qemu_strtosz_suffix_unit() to do_strtosz() and give it internal
linkage.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-15-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
0b742797aa test-cutils: Cover qemu_strtosz() around range limits
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-14-git-send-email-armbru@redhat.com>
2017-02-23 20:35:36 +01:00
Markus Armbruster
a6b4373fa2 test-cutils: Cover qemu_strtosz() with trailing crap
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-13-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
18aec47967 test-cutils: Cover qemu_strtosz() invalid input
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-12-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
019144b286 test-cutils: Add missing qemu_strtosz()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-11-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
3403e5eb88 option: Fix to reject invalid and overflowing numbers
parse_option_number() fails to check for these errors after
strtoull().  Has always been broken.  Fix that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-10-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4baef2679e util/cutils: Clean up control flow around qemu_strtol() a bit
Reorder check_strtox_error() to make it obvious that we always store
through a non-null @endptr.

Transform

    if (some error) {
        error case ...
        err = value for error case;
    } else {
        normal case ...
        err = value for normal case;
    }
    return err;

to

    if (some error) {
        error case ...
        return value for error case;
    }
    normal case ...
    return value for normal case;

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-9-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
717adf9609 util/cutils: Clean up variable names around qemu_strtol()
Name same things the same, different things differently.

* qemu_strtol()'s parameter @nptr is called @p in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s parameter @endptr is called @next in
  check_strtox_error().  Rename the latter.

* qemu_strtol()'s variable @p is called @endptr in
  check_strtox_error().  Rename both to @ep.

* qemu_strtol()'s variable @err is *negative* errno,
  check_strtox_error()'s parameter @err is *positive*.  Rename the
  latter to @libc_errno.

Same for qemu_strtoul(), qemu_strtoi64(), qemu_strtou64(), of course.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-8-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
b30d188677 util/cutils: Rename qemu_strtoll(), qemu_strtoull()
The name qemu_strtoll() suggests conversion to long long, but it
actually converts to int64_t.  Rename to qemu_strtoi64().

The name qemu_strtoull() suggests conversion to unsigned long long,
but it actually converts to uint64_t.  Rename to qemu_strtou64().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-7-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
4295f879be util/cutils: Rewrite documentation of qemu_strtol() & friends
Fixes the following documentation bugs:

* Fails to document that null @nptr is safe.

* Fails to document that we return -EINVAL when no conversion could be
  performed (commit 47d4be1).

* Confuses long long with int64_t, and unsigned long long with
  uint64_t.

* Claims the unsigned conversions can underflow.  They can't.

While there, mark problematic assumptions that int64_t is long long,
and uint64_t is unsigned long long with FIXME comments.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-6-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
bc7c08a2c3 test-cutils: Clean up qemu_strtoul() result checks
Use unsigned comparisons to check the result of qemu_strtoul() and
strtoull().

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-5-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
73245450b3 test-cutils: Add missing qemu_strtol()... endptr checks
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <1487708048-2131-4-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
8ee8409eff option: Assert value string isn't null
Plenty of code relies on QemuOpt member @str not being null, including
qemu_opts_print(), qemu_opts_to_qdict(), and callbacks passed to
qemu_opt_foreach().

Begs the question whether it can be null.  Only opt_set() creates
QemuOpt.  It sets member @str to its argument @value.  Passing null
for @value would plant a time bomb.  Callers:

* opts_do_parse() can't pass null.

* qemu_opt_set() passes its argument @value.  Callers:

  - qemu_opts_from_qdict_1() can't pass null

  - qemu_opts_set() passes its argument @value, but none of its
    callers pass null.

  - Many more outside qemu-option.c, but they shouldn't pass null,
    either.

Assert member @str isn't null, so that misuse is caught right away.

Simplify parse_option_bool(), parse_option_number() and
parse_option_size() accordingly.  Best viewed with whitespace changes
ignored.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-3-git-send-email-armbru@redhat.com>
2017-02-23 20:35:35 +01:00
Markus Armbruster
694baf57ae test-qemu-opts: Cover qemu_opts_parse()
The new tests demonstrate a few bugs, all clearly marked.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487708048-2131-2-git-send-email-armbru@redhat.com>
[A few additional test cases squashed in, see
Message-ID: <871supjijq.fsf@dusky.pond.sub.org>]
2017-02-23 20:34:24 +01:00
Peter Maydell
2d896b454a Revert "hw/mips: MIPS Boston board support"
This reverts commit d3473e147a.

This commit creates a board which defaults to having 2GB of RAM.
Unfortunately on 32-bit hosts we can't create boards with 2GB of RAM,
and so 'make check' fails. I missed this during testing of the
merge, unfortunately. Luckily the offending commit is the last
one in the merge request, so we can just revert it for now.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 18:04:45 +00:00
Gerd Hoffmann
4f72b8d2a6 xhci: properties cleanup
Split xhci properties into common and nec specific.

Move the backward compat flags to nec, so the new qemu-xhci
devices doesn't carry on the compatibiity stuff.

Move the msi/msix switches too and just enable msix for qemu-xhci.

Also move the intrs and slots properties.  Wasn't a great idea to
make them configurable in the first place, nobody needs this.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1487663432-10410-1-git-send-email-kraxel@redhat.com
2017-02-23 16:18:03 +01:00
Li Qiang
6ebc069d67 usb: ohci: fix error return code in servicing td
It should return 1 if an error occurs when reading td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-id: 1487760990-115925-1-git-send-email-liqiang6-s@360.cn
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 16:18:03 +01:00
Marc-André Lureau
c4fe9700e6 usb: replace handle_destroy with unrealize
Curiously, unrealize() is not being used, but it seems more
appropriate than handle_destroy() together with realize(). It is more
ubiquitous destroy name in qemu code base and may throw errors.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170221141451.28305-25-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-23 15:40:19 +01:00
Peter Maydell
10f25e4844 Merge remote-tracking branch 'remotes/yongbok/tags/mips-20170222' into staging
MIPS patches 2017-02-22

Changes:
* Add MIPS Boston board support

# gpg: Signature made Wed 22 Feb 2017 00:08:00 GMT
# gpg:                using RSA key 0x2238EB86D5F797C2
# gpg: Good signature from "Yongbok Kim <yongbok.kim@imgtec.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 8600 4CF5 3415 A5D9 4CFA  2B5C 2238 EB86 D5F7 97C2

* remotes/yongbok/tags/mips-20170222:
  hw/mips: MIPS Boston board support
  hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
  loader: Support Flattened Image Trees (FIT images)
  dtc: Update requirement to v1.4.2
  target-mips: Provide function to test if a CPU supports an ISA
  hw/mips_gic: Update pin state on mask changes
  hw/mips_gictimer: provide API for retrieving frequency
  hw/mips_cmgcr: allow GCR base to be moved

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 09:59:40 +00:00
XiongZhang
c2b2e158cc vfio/pci-quirks.c: Disable stolen memory for igd VFIO
Regardless of running in UPT or legacy mode, the guest igd
drivers may attempt to use stolen memory, however only legacy
mode has BIOS support for reserving stolen memmory in the
guest VM. We zero out the stolen memory size in all cases,
then guest igd driver won't use stolen memory.
In legacy mode, user could use x-igd-gms option to specify the
amount of stolen memory which will be pre-allocated and reserved
by bios for igd use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028
          https://bugs.freedesktop.org/show_bug.cgi?id=99025

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-22 13:19:59 -07:00
Alex Williamson
d0d1cd70d1 vfio/pci: Improve extended capability comments, skip masked caps
Since commit 4bb571d857 ("pci/pcie: don't assume cap id 0 is
reserved") removes the internal use of extended capability ID 0, the
comment here becomes invalid.  However, peeling back the onion, the
code is still correct and we still can't seed the capability chain
with ID 0, unless we want to muck with using the version number to
force the header to be non-zero, which is much uglier to deal with.
The comment also now covers some of the subtleties of using cap ID 0,
such as transparently indicating absence of capabilities if none are
added.  This doesn't detract from the correctness of the referenced
commit as vfio in the kernel also uses capability ID zero to mask
capabilties.  In fact, we should skip zero capabilities precisely
because the kernel might also expose such a capability at the head
position and re-introduce the problem.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>
Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Tested-by: Jintack Lim <jintack@cs.columbia.edu>
2017-02-22 13:19:58 -07:00
Alex Williamson
35c7cb4caf vfio/pci: Report errors from qdev_unplug() via device request
Currently we ignore this error, report it with error_reportf_err()

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2017-02-22 13:19:58 -07:00
Markus Armbruster
7c81e4e9db block: Don't bother asserting type of output visitor's output
After a visit of a complex QAPI type FOO

    ov = qobject_output_visitor_new(&foo);
    visit_type_FOO(ov, NULL, expr, &error_abort);
    visit_complete(ov, &foo);

we can safely assume qobject_type(foo) is QTYPE_QDICT.  We do in many
places, but occasionally assert qobject_type(obj) == QTYPE_QDICT.
Don't.  The appropriate place to check such fundamental properties of
QAPI visitors is the test suite.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-15-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:20 +01:00
Markus Armbruster
bbf1028a0a monitor: Clean up handle_hmp_command() a bit
Leave checking qobject_type(req) to qmp_check_input_obj().  Rework
handling of json_parser_parse_err() failing without setting an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-14-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:17 +01:00
Markus Armbruster
dfad9ec4e9 tests: Don't check qobject_type() before qobject_to_qbool()
qobject_to_qbool(obj) returns NULL when obj isn't a QBool.  Check
that instead of qobject_type(obj) == QTYPE_QBOOL.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-13-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:14 +01:00
Markus Armbruster
8978b34af3 tests: Don't check qobject_type() before qobject_to_qfloat()
qobject_to_qfloat(obj) returns NULL when obj isn't a QFloat.  Check
that instead of qobject_type(obj) == QTYPE_QFLOAT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-12-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:11 +01:00
Markus Armbruster
0abfc4b885 tests: Don't check qobject_type() before qobject_to_qint()
qobject_to_qint(obj) returns NULL when obj isn't a QInt.  Check
that instead of qobject_type(obj) == QTYPE_QINT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-11-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:09 +01:00
Markus Armbruster
363e13f86e tests: Don't check qobject_type() before qobject_to_qstring()
qobject_to_qstring(obj) returns NULL when obj isn't a QString.  Check
that instead of qobject_type(obj) == QTYPE_QSTRING.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-10-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:06 +01:00
Markus Armbruster
cd17ba51f5 tests: Don't check qobject_type() before qobject_to_qlist()
qobject_to_qlist(obj) returns NULL when obj isn't a QList.  Check
that instead of qobject_type(obj) == QTYPE_QLIST.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-9-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:04 +01:00
Markus Armbruster
ca6b6e1e68 Don't check qobject_type() before qobject_to_qdict()
qobject_to_qdict(obj) returns NULL when obj isn't a QDict.  Check
that instead of qobject_type(obj) == QTYPE_QDICT.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-8-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:52:01 +01:00
Markus Armbruster
4b32e11a59 test-qmp-event: Simplify and tighten event_test_emit()
Use qdict_get_qdict() and qdict_get_try_int() to simplify.

While there, add a sanity check for seconds.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-7-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:59 +01:00
Markus Armbruster
4d96f329cc libqtest: Clean up qmp_response() a bit
Use qobject_to_qdict() instead of a type cast.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-6-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:56 +01:00
Markus Armbruster
9eaaf97168 check-qjson: Simplify around compare_litqobj_to_qobj()
Make compare_litqobj_to_qobj() cope with null, and drop non-null
assertions from callers.

compare_litqobj_to_qobj() already checks the QType matches; drop the
redundant assertions from callers.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-5-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:54 +01:00
Markus Armbruster
a68931ea5f check-qdict: Tighten qdict_crumple_test_recursive() some
Consistently check for unexpected QDict entries, and qdict_get_qdict()
success.  The latter doesn't tighten the test, it only makes it fail
more nicely.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-4-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:51 +01:00
Markus Armbruster
ff9d38963e check-qdict: Simplify qdict_crumple_test_recursive()
Use qdict_get_qdict(), qdict_get_qlist() instead of qdict_get()
followed by qobject_to_qdict(), qobject_to_qlist().

While there, drop some redundant code.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-3-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:48 +01:00
Markus Armbruster
b25f23e7db qdict: Make qdict_get_qlist() safe like qdict_get_qdict()
Commit 89cad9f changed qdict_get_qdict() to return NULL instead of
crash when the key doesn't exist or its value isn't a QDict.
Commit 2d6421a neglected to do the same for qdict_get_qlist().
Correct that, and update the function comments.

qdict_get_obj() is now unused, remove.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487363905-9480-2-git-send-email-armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2017-02-22 19:51:37 +01:00
Markus Armbruster
d3be4b57ce net: Flatten simple union NetLegacyOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NetLegacyOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-3-git-send-email-armbru@redhat.com>
2017-02-22 19:50:52 +01:00
Markus Armbruster
d081a49af8 numa: Flatten simple union NumaOptions
Simple unions are simpler than flat unions in the schema, but more
complicated in C and on the QMP wire: there's extra indirection in C
and extra nesting on the wire, both pointless.  They're best avoided
in new code.

NumaOptions isn't new, but it's only used internally, not in QMP.
Convert it to a flat union.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487709988-14322-2-git-send-email-armbru@redhat.com>
2017-02-22 19:50:46 +01:00
Peter Maydell
fb6971c110 hw/ppc/ppc405_uc.c: Avoid integer overflows
When performing clock calculations, the ppc405_uc code
has several places where it multiplies together two
32-bit variables and assigns the result to a 64-bit
variable. This doesn't quite do what is intended because
C will compute a 32-bit multiply result. Add casts to
ensure we don't truncate the result.

(Spotted by Coverity, CID 1005504, 1005505.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Thomas Huth
df58713396 hw/ppc/spapr: Check for valid page size when hot plugging memory
On POWER, the valid page sizes that the guest can use are bound
to the CPU and not to the memory region. QEMU already has some
fancy logic to find out the right maximum memory size to tell
it to the guest during boot (see getrampagesize() in the file
target/ppc/kvm.c for more information).
However, once we're booted and the guest is using huge pages
already, it is currently still possible to hot-plug memory regions
that does not support huge pages - which of course does not work
on POWER, since the guest thinks that it is possible to use huge
pages everywhere. The KVM_RUN ioctl will then abort with -EFAULT,
QEMU spills out a not very helpful error message together with
a register dump and the user is annoyed that the VM unexpectedly
died.
To avoid this situation, we should check the page size of hot-plugged
DIMMs to see whether it is possible to use it in the current VM.
If it does not fit, we can print out a better error message and
refuse to add it, so that the VM does not die unexpectely and the
user has a second chance to plug a DIMM with a matching memory
backend instead.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1419466
Signed-off-by: Thomas Huth <thuth@redhat.com>
[dwg: Fix a build error on 32-bit builds with KVM]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Alex Zuepke
0a4c774086 target-ppc: fix Book-E TLB matching
The Book-E TLB matching process should bail out early when a TLB
entry matches, but the access permissions are wrong. The CPU
will then raise a DSI error instead of a Data TLB error, as
described for TLB matching in Freescale and IBM documents.

Signed-off-by: Alex Zuepke <azu@sysgo.de>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Sam Bobroff
87684b4c40 hw/net/spapr_llan: 6 byte mac address device tree entry
The spapr-vlan device in QEMU has always presented it's MAC address in
the device tree as an 8 byte value, even though PAPR requires it to be
6 bytes.  This is because, at the time, AIX required the value to be 8
bytes.  However, modern versions of AIX support the (correct) 6
byte value so they no longer require the workaround.

It would be neatest to always provide a 6 byte value but that would
cause a problem with old Linux kernel ibmveth drivers, so the old 8
byte value is still presented when necessary.

Since commit 13f85203e (3.10, May 2013) the driver has been able to
handle 6 or 8 byte addresses so versions after that don't need to be
considered specially.

Drivers from kernels before that can also handle either type of
address, but not always:
* If the first byte's lowest bits are 10, the address must be 6 bytes.
* Otherwise, the address must be 8 bytes.
(The two bits in question are significant in a MAC address: they
indicate a locally-administered unicast address.)

So to maintain compatibility the old 8 byte value is presented when
the lowest two bits of the first byte are not 10.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 14:28:53 +11:00
Igor Mammedov
c5514d0e4b machine: replace query_hotpluggable_cpus() callback with has_hotpluggable_cpus flag
Generic helper machine_query_hotpluggable_cpus() replaced
target specific query_hotpluggable_cpus() callbacks so
there is no need in it anymore. However inon NULL callback
value is used to detect/report hotpluggable cpus support,
therefore it can be removed completely.
Replace it with MachineClass.has_hotpluggable_cpus boolean
which is sufficient for the task.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
f2d672c248 machine: unify [pc_|spapr_]query_hotpluggable_cpus() callbacks
All callbacks FOO_query_hotpluggable_cpus() are practically
the same except of setting vcpus_count to different values.
Convert them to a generic machine_query_hotpluggable_cpus()
callback by moving vcpus_count initialization to per machine
specific callback possible_cpu_arch_ids().

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
535455fdee spapr: reuse machine->possible_cpus instead of cores[]
Replace SPAPR specific cores[] array with generic
machine->possible_cpus and store core objects there.
It makes cores bookkeeping similar to x86 cpus and
will allow to unify similar code.
It would allow to replace cpu_index based NUMA node
mapping with iproperty based one (for -device created
cores) since possible_cpus carries board defined
topology/layout.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
8aba384298 change CPUArchId.cpu type to Object*
so it could be reused for SPAPR cores as well

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
1ea69c0e25 pc: pass apic_id to pc_find_cpu_slot() directly so lookup could be done without CPU object
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c67ae9333c pc: calculate topology only once when possible_cpus is initialised
Fill in CpuInstanceProperties once at board init time and
just copy them whenever query_hotpluggable_cpus() is called.
It will keep topology info always available without need
to recalculate it every time it's needed.
Considering it has NUMA node id, it will be used to keep
NUMA node to cpu mapping instead of numa_info[i].node_cpu
bitmasks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
c96a1c0ba6 pc: move pcms->possible_cpus init out of pc_cpus_init()
possible_cpus could be initialized earlier then cpu objects,
i.e. when -smp is parsed so move init code to possible_cpu_arch_ids()
interface func and do initialization on the first call.

it should help later with making -numa cpu/-smp parsing a machine state
properties.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Igor Mammedov
38690a1ca7 machine: move possible_cpus to MachineState
so that it would be possible to reuse it with
spapr/virt-aarch64 targets.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Thomas Huth
fb38ebfbfe hw/pci-host/prep: Do not use hw_error() in realize function
hw_error() is for CPU related errors only (it prints out a
register dump and calls abort()), so we should not use it
if we just failed to load the bios image. Apart from that,
realize() functions should not exit directly but always set
the errp with error_setg() in case of errors instead.
Additionally, move some code around and delete the bios memory
subregion again in case of such an error, so that we leave a
clean state when returning to the caller.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Hervé Poussineau <hpoussin@reactos.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
5065908361 target/ppc/POWER9: Direct all instr and data storage interrupts to the hypv
The vpm0 bit was removed from the LPCR in POWER9, this bit controlled
whether ISI and DSI interrupts were directed to the hypervisor or the
partition. These interrupts now go to the hypervisor irrespective, thus
it is no longer necessary to check the vmp0 bit in the LPCR.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
18aa49ecf4 target/ppc/POWER9: Adapt LPCR handling for POWER9
The logical partitioning control register controls a threads operation
based on the partition it is currently executing. Add new definitions and
update the mask used when writing to the LPCR based on the POWER9 spec.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
86cf1e9fe8 target/ppc/POWER9: Add ISAv3.00 MMU definition
POWER9 processors implement the mmu as defined in version 3.00 of the ISA.

Add a definition for this mmu model and set the POWER9 cpu model to use
this mmu model.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Suraj Jitindar Singh
7659ca1a3e target/ppc: Fix LPCR DPFD mask define
The DPFD field in the LPCR is 3 bits wide. This has always been defined
as 0x3 << shift which indicates a 2 bit field, which is incorrect.
Correct this.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
e0aee726bf target-ppc: Add xscvqpudz and xscvqpuwz instructions
xscvqpudz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Doubleword format
xscvqpuwz: VSX Scalar truncate & Convert Quad-Precision format to
           Unsigned Word format

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
a8d411abac target-ppc: Implement round to odd variants of quad FP instructions
xsaddqpo:  VSX Scalar Add Quad-Precision using round to Odd
xsmulqo:   VSX Scalar Multiply Quad-Precision using round to Odd
xsdivqpo:  VSX Scalar Divide Quad-Precision using round to Odd
xscvqpdpo: VSX Scalar round & Convert Quad-Precision format to
           Double-Precision format using round to Odd
xssqrtqpo: VSX Scalar Square Root Quad-Precision using round to Odd
xssubqpo:  VSX Scalar Subtract Quad-Precision using round to Odd

In addition, fix the invalid bitmask in the instruction encoding
of xssqrtqp[o].

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
fd425037d2 softfloat: Add float128_to_uint32_round_to_zero()
float128_to_uint32_round_to_zero() is needed by xscvqpuwz instruction
of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
2e6d856835 softfloat: Add float128_to_uint64_round_to_zero()
Implement float128_to_uint64() and use that to implement
float128_to_uint64_round_to_zero()

This is required by xscvqpudz instruction of PowerPC ISA 3.0.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Bharata B Rao
9ee6f678f4 softfloat: Add round-to-odd rounding mode
Power ISA 3.0 introduces a few quadruple precision floating point
instructions that support round-to-odd rounding mode. The
round-to-odd mode is explained as under:

Let Z be the intermediate arithmetic result or the operand of a convert
operation. If Z can be represented exactly in the target format, the
result is Z. Otherwise the result is either Z1 or Z2 whichever is odd.
Here Z1 and Z2 are the next larger and smaller numbers representable
in the target format respectively.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5b929608b9 spapr: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
f4af7d4438 ppc4xx: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Laurent Vivier
5283c27fc5 mac99: replace debug printf with trace points
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Sam Bobroff
2635531f20 target-ppc, tcg: fix usermode segfault with pthread_create()
Programs run under qemu-ppc64 on an x86_64 host currently segfault
if they use pthread_create() due to the adjustment made to the NIP in
commit bd6fefe71c.

This patch changes cpu_loop() to set the NIP back to the
pre-incremented value before calling do_syscall(), which causes the
correct address to be used for the new thread and corrects the fault.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
c09cec683b target-ppc: add wait instruction
Use the available wait instruction implementation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
62d897ca8b target-ppc: add slbsync implementation
slbsync: SLB Synchoronize

The instruction provides an ordering function for the effects of all
slbieg instructions executed by the thread executing the slbsync
instruction.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
a63f1dfc62 target-ppc: add slbieg instruction
slbieg: SLB Invalidate Entry Global

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Nikunj A Dadhania
80b8c1ee05 target-ppc: generate exception for copy/paste
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:28 +11:00
Balamuruhan S
a34011881c target-ppc: implement store atomic instruction
stwat: Store Word Atomic
stdat: Store Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ implement stdat, use macro and combine both implementation ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Balamuruhan S
a68a614673 target-ppc: implement load atomic instruction
lwat: Load Word Atomic
ldat: Load Doubleword Atomic

The instruction includes as function code (5 bits) which gives a detail
on the operation to be performed. The patch implements five such
functions.

Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
Signed-off-by: Harish S <harisrir@linux.vnet.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[ combine both lwat/ldat implementation using macro ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Sam Bobroff
fe93e3e6ec spapr: fix off-by-one error in spapr_ovec_populate_dt()
The last byte of the option vector was missing due to an off-by-one
error. Without this fix, client architecture support negotiation will
fail because the last byte of option vector 5, which contains the MMU
support, will be missed.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
d4ccd87e68 target-ppc: Add xsmaxjdp and xsminjdp instructions
xsmaxjdp: VSX Scalar Maximum Type-J Double-Precision
xsminjdp: VSX Scalar Minimum Type-J Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Bharata B Rao
2770deede0 target-ppc: Add xsmaxcdp and xsmincdp instructions
xsmaxcdp: VSX Scalar Maximum Type-C Double-Precision
xsmincdp: VSX Scalar Minimum Type-C Double-Precision

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Thomas Huth
802fc7abd0 hw/ppc/pnv: Remove superfluous "qemu" prefix from error strings
error_report() already puts a prefix with the program name in front
of the error strings, so the "qemu:" prefix is not necessary here
anymore.

Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
f6b99afdc3 ppc: implement xssubqp instruction
xssubqp: VSX Scalar Subtract Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
a4a68476de ppc: implement xssqrtqp instruction
xssqrtqp: VSX Scalar Square Root Quad-Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
917950d7f5 ppc: implement xsrqpxp instruction
xsrqpxp: VSX Scalar Round Quad-Precision to Double-Extended Precision.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Jose Ricardo Ziviani
be07ad5842 ppc: implement xsrqpi[x] instruction
xsrqpi[x]: VSX Scalar Round to Quad-Precision Integer
[with Inexact].

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
115debf26c spapr: make cpu core unplug follow expected hotunplug call flow
spapr_core_unplug() were essentially spapr_core_unplug_request()
handler that requested CPU removal and registered callback
which did actual cpu core removali but it was called from
spapr_machine_device_unplug() which is intended for actual object
removal. Commit (cf632463 spapr: Memory hot-unplug support)
sort of fixed it introducing spapr_machine_device_unplug_request()
and calling spapr_core_unplug() but it hasn't renamed callback and
by mistake calls it from spapr_machine_device_unplug().

However spapr_machine_device_unplug() isn't ever called for
cpu core since spapr_core_release() doesn't follow expected
hotunplug call flow which is:
 1: device_del() ->
        hotplug_handler_unplug_request() ->
            set destroy_cb()
 2: destroy_cb() ->
        hotplug_handler_unplug() ->
            object_unparent // actual device removal

Fix it by renaming spapr_core_unplug() to spapr_core_unplug_request()
which is called from spapr_machine_device_unplug_request() and
making spapr_core_release() call hotplug_handler_unplug() which
will call spapr_machine_device_unplug() -> spapr_core_unplug()
to remove cpu core.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reveiwed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
ff9006ddbf spapr: move spapr_core_[foo]plug() callbacks close to machine code in spapr.c
spapr_core_pre_plug/spapr_core_plug/spapr_core_unplug() are managing
wiring CPU core into spapr machine state and not internal CPU core state.
So move them from spapr_cpu_core.c to spapr.c where other similar
(spapr_memory_[foo]plug()) callbacks are located, which also matches
x86 target practice.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Igor Mammedov
f844616bf6 spapr: cpu core: separate child threads destruction from machine state operations
Split off destroying VCPU threads from drc callback
spapr_core_release() into new spapr_cpu_core_unrealizefn()
which takes care of internal cpu core state cleanup (i.e.
VCPU threads) and is called when object_unparent(core)
is called.

That leaves spapr_core_release() only with board mgmt
code, which will be moved to board related file in
follow up patch along with the rest on hotplug callbacks.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-22 11:28:27 +11:00
Paul Burton
d3473e147a hw/mips: MIPS Boston board support
Introduce support for emulating the MIPS Boston development board. The
Boston board is built around an FPGA & 3 PCIe controllers, one of which
is connected to an Intel EG20T Platform Controller Hub. It is used
during the development & debug of new CPUs and the software intended to
run on them, and is essentially the successor to the older MIPS Malta
board.

This patch does not implement the EG20T, instead connecting an already
supported ICH-9 AHCI controller. Whilst this isn't accurate it's enough
for typical stock Boston software (eg. Linux kernels) to work with hard
disks given that both the ICH-9 & EG20T implement the AHCI
specification.

Boston boards typically boot kernels in the FIT image format, and this
patch will treat kernels provided to QEMU as such. When loading a kernel
directly, the board code will generate minimal firmware much as the
Malta board code does. This firmware will set up the CM, CPC & GIC
register base addresses then set argument registers & jump to the kernel
entry point. Alternatively, bootloader code may be loaded using the bios
argument in which case no firmware will be generated & execution will
proceed from the start of the boot code at the default MIPS boot
exception vector (offset 0x1fc00000 into (c)kseg1).

Currently real Boston boards are always used with FPGA bitfiles that
include a Global Interrupt Controller (GIC), so the interrupt
configuration is only defined for such cases. Therefore the board will
only allow use of CPUs which implement the CPS components, including the
GIC, and will otherwise exit with a message.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
[yongbok.kim@imgtec.com:
  isolated boston machine support for mips64el.
  updated for recent Chardev changes.
  ignore missing bios/kernel for qtest.
  added default -drive to if=ide explicitly]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:30 +00:00
Paul Burton
62be393423 hw: xilinx-pcie: Add support for Xilinx AXI PCIe Controller
Add support for emulating the Xilinx AXI Root Port Bridge for PCI
Express as described by Xilinx' PG055 document. This is a PCIe
controller that can be used with certain series of Xilinx FPGAs, and is
used on the MIPS Boston board which will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  removed returning on !level,
  updated IRQ connection with GPIO logic,
  moved xilinx_pcie_init() to boston.c
  replaced stw_le_p() with pci_set_word()
  and other cosmetic changes]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:49:29 +00:00
Paul Burton
51b58561c1 loader: Support Flattened Image Trees (FIT images)
Introduce support for loading Flattened Image Trees, as used by modern
U-Boot. FIT images are essentially flattened device tree files which
contain binary images such as kernels, FDTs or ramdisks along with one
or more configuration nodes describing boot configurations.

The MIPS Boston board typically boots kernels in the form of FIT images,
and will make use of this code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
[yongbok.kim@imgtec.com:
  fixed potential memory leaks,
  isolated building option]
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 23:47:40 +00:00
Paul Burton
6e85fce022 dtc: Update requirement to v1.4.2
In order to obtain fdt_first_subnode & fdt_next_subnode symbols from
libfdt for use by a later patch, bump the requirement for dtc to v1.4.2
& the submodule to that same version.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
bed9e5ceb1 target-mips: Provide function to test if a CPU supports an ISA
Provide a new cpu_supports_isa function which allows callers to
determine whether a CPU supports one of the ISA_ flags, by testing
whether the associated struct mips_def_t sets the ISA flags in its
insn_flags field.

An example use of this is to allow boards which generate bootloader code
to determine the properties of the CPU that will be used, for example
whether the CPU is 64 bit or which architecture revision it implements.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
2e2a1b4648 hw/mips_gic: Update pin state on mask changes
If the GIC interrupt mask is changed by a write to the smask (set mask)
or rmask (reset mask) registers, we need to re-evaluate the state of the
pins/IRQs fed to the CPU. Without doing so we risk leaving a pin high
despite the interrupt that led to that state being masked, or losing
interrupts if an already pending interrupt is unmasked.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
eb90ab9437 hw/mips_gictimer: provide API for retrieving frequency
Provide a new function mips_gictimer_get_freq() which returns the
frequency at which a GIC timer will count. This will be useful for
boards which perform setup based upon this frequency.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Paul Burton
08944be1d9 hw/mips_cmgcr: allow GCR base to be moved
Support moving the GCR base address & updating the CPU's CP0 CMGCRBase
register appropriately. This is required if a platform needs to move its
GCRs away from other memory, as the MIPS Boston development board does
to avoid its flash memory.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
2017-02-21 22:24:58 +00:00
Peter Maydell
e295a154c2 Merge remote-tracking branch 'remotes/dgilbert/tags/pull-hmp-20170221' into staging
HMP pull

Note, I had seen a fail in the vhost-user/flags-mismatch on one
host in one build, but not others with the same patches; and these patches
go nowhere near that, so I think that's a separate vhost-user issue.

# gpg: Signature made Tue 21 Feb 2017 18:49:25 GMT
# gpg:                using RSA key 0x0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-hmp-20170221:
  monitor: Fix crashes when using HMP commands without CPU
  monitor: add poll-* properties into query-iothreads result
  hmp: fix block_set_io_throttle

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 19:41:57 +00:00
Thomas Huth
854e67fea6 monitor: Fix crashes when using HMP commands without CPU
When running certain HMP commands ("info registers", "info cpustats",
"info tlb", "nmi", "memsave" or dumping virtual memory) with the "none"
machine, QEMU crashes with a segmentation fault. This happens because the
"none" machine does not have any CPUs by default, but these HMP commands
did not check for a valid CPU pointer yet. Add such checks now, so we get
an error message about the missing CPU instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1484309555-1935-1-git-send-email-thuth@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Pavel Hrdina
5fc00480ab monitor: add poll-* properties into query-iothreads result
IOthreads were recently extended by new properties that can
enable/disable and configure aio polling.  This will also allow
other tools that uses QEMU to probe for existence of those new
properties via query-qmp-schema.

Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Message-Id: <3163c16d6ab4257f7be9ad44fe9cc0ce8c359e5a.1486718555.git.phrdina@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:01 +00:00
Eric Blake
3f35c3b166 hmp: fix block_set_io_throttle
Commit 7a9877a made the 'device' parameter to BlockIOThrottle
optional, favoring 'id' instead.  But it forgot to update the
HMP usage to set has_device, which makes all attempts to change
throttling via HMP fail with "Need exactly one of 'device' and 'id'"

CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170120230359.4244-1-eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2017-02-21 18:29:00 +00:00
Peter Maydell
796b288f7b Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 21 Feb 2017 15:40:05 GMT
# gpg:                using RSA key 0xBDBE7B27C0DE3057
# gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>"
# gpg:                 aka "Jeffrey Cody <jeff@codyprime.org>"
# gpg:                 aka "Jeffrey Cody <codyprime@gmail.com>"
# Primary key fingerprint: 9957 4B4D 3474 90E7 9D98  D624 BDBE 7B27 C0DE 3057

* remotes/cody/tags/block-pull-request:
  qemu-options: Fix broken sheepdog URL
  mirror: do not increase offset during initial zero_or_discard phase
  QAPI: Fix blockdev-add example documentation
  iscsi: Add blockdev-add support
  iscsi: Add timeout option
  iscsi: Add header-digest option
  iscsi: Add initiator-name option
  iscsi: Handle -iscsi user/password in bdrv_parse_filename()
  iscsi: Split URL into individual options

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 15:48:22 +00:00
Thomas Huth
6135c5e126 qemu-options: Fix broken sheepdog URL
The sheepdog URL is broken twice: First it uses a duplicated
http:// prefix, second the website seems to have moved to
https://sheepdog.github.io/sheepdog/ instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:09 -05:00
Anton Nefedov
90ab48eb07 mirror: do not increase offset during initial zero_or_discard phase
If explicit zeroing out before mirroring is required for the target image,
it moves the block job offset counter to EOF, then offset and len counters
count the image size twice. There is no harm but stats are confusing,
specifically the progress of the operation is always reported as 99% by
management tools.

The patch skips offset increase for the first "technical" pass over the
image. This should not cause any further harm.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1486045515-8009-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Eric Blake <eblake@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:38:00 -05:00
Jeff Cody
b166099712 QAPI: Fix blockdev-add example documentation
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:46 -05:00
Kevin Wolf
31eb1202d3 iscsi: Add blockdev-add support
This adds blockdev-add support for iscsi devices.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:34 -05:00
Kevin Wolf
1d56010482 iscsi: Add timeout option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

All -iscsi options have a corresponding driver-specific option for the
iscsi block driver now.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:26 -05:00
Kevin Wolf
81aa2a0fb5 iscsi: Add header-digest option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:16 -05:00
Kevin Wolf
d4e799292c iscsi: Add initiator-name option
This was previously only available with -iscsi. Again, after this patch,
the -iscsi option only takes effect if an URL is given. New users are
supposed to use the new driver-specific option.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:37:08 -05:00
Kevin Wolf
4317142020 iscsi: Handle -iscsi user/password in bdrv_parse_filename()
This splits the logic in the old parse_chap() function into a part that
parses the -iscsi options into the new driver-specific options, and
another part that actually applies those options (called apply_chap()
now).

Note that this means that username and password specified with -iscsi
only take effect when a URL is provided. This is intentional, -iscsi is
a legacy interface only supported for compatibility, new users should
use the proper driver-specific options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:57 -05:00
Kevin Wolf
d5895fcb1d iscsi: Split URL into individual options
This introduces a .bdrv_parse_filename handler for iscsi which parses an
URL if given and translates it to individual options.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
2017-02-21 10:36:34 -05:00
Peter Maydell
a1cf5fac2b Merge remote-tracking branch 'remotes/armbru/tags/pull-block-2017-02-21' into staging
Changes to -drive without if= and with if=scsi

# gpg: Signature made Tue 21 Feb 2017 12:22:35 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-block-2017-02-21:
  hw/i386: Deprecate -drive if=scsi with PC machine types
  hw: Deprecate -drive if=scsi with non-onboard HBAs
  hw/scsi: Concentrate -drive if=scsi auto-create in one place
  hw: Drop superfluous special checks for orphaned -drive
  blockdev: Make orphaned -drive fatal
  blockdev: Improve message for orphaned -drive
  hw/arm/highbank: Default -drive to if=ide instead of if=scsi
  hw: Default -drive to if=none instead of scsi when scsi cannot work
  hw: Default -drive to if=none instead of ide when ide cannot work
  hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
  hw: Default -drive to if=ide explicitly where it works

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 13:58:50 +00:00
Markus Armbruster
f778a82f0c hw/i386: Deprecate -drive if=scsi with PC machine types
The PC machines (pc-q35-* pc-i440fx-* pc-* isapc xenfv) automatically
create lsi53c895a SCSI HBAs and SCSI devices to honor -drive if=scsi.
For giggles, try -drive if=scsi,bus=25,media=cdrom --- this makes QEMU
create 25 of them.

lsi53c895a is thoroughly obsolete (PCI Ultra2 SCSI, ca. 2000), and
currently has no maintainer in QEMU.  megasas is a better choice,
except with old OSes that lack drivers.  virtio-scsi is a much better
choice when you have a driver, but only (newish) Linux comes with one
in the box.  There is no good default that works for all guests.

Encourage users to pick a non-obsolete SCSI HBA that works for them by
deprecating -drive if=scsi.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-4-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
a64aa5785d hw: Deprecate -drive if=scsi with non-onboard HBAs
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

Drives defined with if=scsi are also picked up by SCSI HBAs added with
-device, unlike other interface types.  Deprecate this usage, as follows.

Create the frontends for onboard HBAs in machine initialization code,
exactly like we do for if=ide and other interface types.  Change
scsi_legacy_handle_cmdline() to create a frontend only when it's still
missing, and warn that this usage is deprecated.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-3-git-send-email-armbru@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
fb8b660e17 hw/scsi: Concentrate -drive if=scsi auto-create in one place
The logic to create frontends for -drive if=scsi is in SCSI HBAs.  For
all other interface types, it's in machine initialization code.

A few machine types create the SCSI HBAs necessary for that.  That's
also not done for other interface types.

I'm going to deprecate these SCSI eccentricities.  In preparation for
that, create the frontends in main() instead of the SCSI HBAs, by
calling new function scsi_legacy_handle_cmdline() there.

Note that not all SCSI HBAs create frontends.  Take care not to change
that.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487161136-9018-2-git-send-email-armbru@redhat.com>
Acked-By: Paolo Bonzini <pbonzini@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
8f2d75e81d hw: Drop superfluous special checks for orphaned -drive
We've traditionally rejected orphans here and there, but not
systematically.  For instance, the sun4m machines have an onboard SCSI
HBA (bus=0), and have always rejected bus>0.  Other machines with an
onboard SCSI HBA don't.

Commit a66c9dc made all orphans trigger a warning, and the previous
commit turned this into an error.  The checks "here and there" are now
redundant.  Drop them.

Note that the one in mips_jazz.c was wrong: it rejected bus > MAX_FD,
but MAX_FD is the number of floppy drives per bus.

Error messages change from

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: Too many IDE buses defined (3 > 2)
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu: too many floppy drives
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu: too many SCSI bus

to

    $ qemu-system-x86_64 -drive if=ide,bus=2
    qemu-system-x86_64: -drive if=ide,bus=2: machine type does not support if=ide,bus=2,unit=0
    $ qemu-system-mips64 -M magnum,accel=qtest -drive if=floppy,bus=2,id=fd1
    qemu-system-mips64: -drive if=floppy,bus=2,id=fd1: machine type does not support if=floppy,bus=2,unit=0
    $ qemu-system-sparc -M LX -drive if=scsi,bus=1
    qemu-system-sparc: -drive if=scsi,bus=1: machine type does not support if=scsi,bus=1,unit=0

Cc: John Snow <jsnow@redhat.com>
Cc: "Hervé Poussineau" <hpoussin@reactos.org>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-9-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
720b8dc052 blockdev: Make orphaned -drive fatal
Block backends defined with "-drive if=T" with T other than "none" are
meant to be picked up by machine initialization code: a suitable
frontend gets created and wired up automatically.

If machine initialization code doesn't comply, the block backend
remains unused.  This triggers a warning since commit a66c9dc, v2.2.0.
Drives created by default are exempted; use -nodefaults to get rid of
them.

Turn this warning into an error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-8-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:45 +01:00
Markus Armbruster
664cc623bf blockdev: Improve message for orphaned -drive
We warn when a -drive isn't supported by the machine type (commit
a66c9dc):

    $ qemu-system-x86_64 -S -display none -drive if=mtd
    Warning: Orphaned drive without device: id=mtd0,file=,if=mtd,bus=0,unit=0

Improve this to point to the offending bit of configuration:

    qemu-system-x86_64: -drive if=mtd: warning: machine type does not support if=mtd,bus=0,unit=0

Especially nice when it's hidden behind -readconfig foo.cfg:

    qemu-system-x86_64:foo.cfg:140: warning: machine type does not support if=mtd,bus=0,unit=0

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1487153147-11530-7-git-send-email-armbru@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
2017-02-21 13:17:40 +01:00
Markus Armbruster
2a7ae4ee50 hw/arm/highbank: Default -drive to if=ide instead of if=scsi
These machines have no onboard SCSI HBA, and no way to plug one.
-drive if=scsi therefore cannot work.  They do have an onboard IDE
controller (sysbus-ahci), but fail to honor if=ide.

Change their default to if=ide, and add a TODO comment on what needs
to be done to actually honor -drive if=ide.

Cc: Rob Herring <robh@kernel.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-6-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
7e465513c1 hw: Default -drive to if=none instead of scsi when scsi cannot work
Block backends defined with -drive if=scsi are meant to be picked up
by machine initialization code: a suitable frontend gets created and
wired up automatically.

if=scsi drives not picked up that way can still be used with -device
as if they had if=none, but that's unclean and best avoided.  Unused
ones produce an "Orphaned drive without device" warning.

A few machine types default to if=scsi, even though they don't
actually have a SCSI HBA.  This makes no sense.  Change their default
to if=none.  Affected machines:

* aarch64/arm: realview-pbx-a9 vexpress-a9 vexpress-a15 xilinx-zynq-a9

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-5-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
a27fa28f03 hw: Default -drive to if=none instead of ide when ide cannot work
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types implicitly default to if=ide that way, even though
they don't actually have an IDE controller.  This makes no sense.

Change the implicit default to if=none.  Affected machines:

* all targets: none
* aarch64/arm: akita ast2500 canon cheetah collie connex imx25
  integratorcp kzm lm3s6965evb lm3s811evb mainstone musicpal n800 n810
  netduino2 nuri palmetto realview romulus sabrelite smdkc210 sx1 sx1
  verdex z2
* cris: axis-dev88
* i386/x86_64: xenpv
* lm32: lm32-evr lm32-uclinux milkymist
* m68k: an5206 dummy mcf5208evb
* microblaze/microblazeel: petalogix-ml605 petalogix-s3adsp1800
* mips/mips64/mips64el/mipsel: mipssim
* moxie: moxiesim
* or32: or32-sim
* ppc/ppc64/ppcemb: bamboo ref405ep taihu virtex-ml507
* ppc/ppc64: mpc8544ds ppce500
* sh4/sh4eb: shix
* sparc: leon3_generic
* sparc64: niagara
* tricore: tricore_testboard
* unicore32: puv3
* xtensa/xtensaeb: kc705 lx200 lx60 ml605 sim

None of these machines have an IDE controller, let alone code to
honor if=ide.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Cc: Edgar E. Iglesias <edgar.iglesias@gmail.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: xen-devel@lists.xensource.com
Cc: Michael Walle <michael@walle.cc>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Anthony Green <green@moxielogic.com>
Cc: Jia Liu <proljc@gmail.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: qemu-ppc@nongnu.org
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Fabien Chouteau <chouteau@adacore.com>
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Cc: Artyom Tarasenko <atar4qemu@gmail.com>
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-By: Artyom Tarasenko <atar4qemu@gmail.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-4-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
e0319b0302 hw/arm/cubieboard hw/arm/xlnx-ep108: Fix units_per_default_bus
Machine types cubieboard, xlnx-ep108, xlnx-zcu102 have an onboard AHCI
controller, but neglect to set their MachineClass member
units_per_default_bus = 1.  This permits -drive if=ide,unit=1, which
makes no sense for AHCI.  It also screws up index=N for odd N, because
it gets desugared to unit=1,bus=N/2

Doesn't really matter, because these machine types fail to honor
-drive if=ide.  Add the missing units_per_default_bus = 1 anyway,
along with a TODO comment on what needs to be done for -drive if=ide.

Also set block_default_type = IF_IDE explicitly.  It's currently the
default, but the next commit will change it to something more
sensible, and we want to keep the IF_IDE default for these three
machines.  See also the previous commit.

Cc: Beniamino Galvani <b.galvani@gmail.com>
Cc: Alistair Francis <alistair.francis@xilinx.com>
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-arm@nongnu.org
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Alistair Francis <alistair.francis@xilinx.com>
Message-Id: <1487153147-11530-3-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Markus Armbruster
2059839baa hw: Default -drive to if=ide explicitly where it works
Block backends defined with -drive if=ide are meant to be picked up by
machine initialization code: a suitable frontend gets created and
wired up automatically.

if=ide drives not picked up that way can still be used with -device as
if they had if=none, but that's unclean and best avoided.  Unused ones
produce an "Orphaned drive without device" warning.

-drive parameter "if" is optional, and the default depends on the
machine type.  If a machine type doesn't specify a default, the
default is "ide".

Many machine types default to if=ide, even though they don't actually
have an IDE controller.  A future patch will change these defaults to
something more sensible.  To prepare for it, this patch makes default
"ide" explicit for the machines that actually pick up if=ide drives:

* alpha: clipper
* arm/aarch64: spitz borzoi terrier tosa
* i386/x86_64: generic-pc-machine (with concrete subtypes pc-q35-*
  pc-i440fx-* pc-* isapc xenfv)
* mips64el: fulong2e
* mips/mipsel/mips64el: malta mips
* ppc/ppc64: mac99 g3beige prep
* sh4/sh4eb: r2d
* sparc64: sun4u sun4v

Note that ppc64 machine powernv already sets an "ide" default
explicitly.  Its IDE controller isn't implemented, yet.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1487153147-11530-2-git-send-email-armbru@redhat.com>
2017-02-21 13:10:53 +01:00
Peter Maydell
a0775e28cd Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request

v2:
 * Rebased to resolve scsi conflicts

# gpg: Signature made Tue 21 Feb 2017 11:56:24 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request: (24 commits)
  coroutine-lock: make CoRwlock thread-safe and fair
  coroutine-lock: add mutex argument to CoQueue APIs
  coroutine-lock: place CoMutex before CoQueue in header
  test-aio-multithread: add performance comparison with thread-based mutexes
  coroutine-lock: add limited spinning to CoMutex
  coroutine-lock: make CoMutex thread-safe
  block: document fields protected by AioContext lock
  async: remove unnecessary inc/dec pairs
  aio-posix: partially inline aio_dispatch into aio_poll
  block: explicitly acquire aiocontext in aio callbacks that need it
  block: explicitly acquire aiocontext in bottom halves that need it
  block: explicitly acquire aiocontext in callbacks that need it
  block: explicitly acquire aiocontext in timers that need it
  aio: push aio_context_acquire/release down to dispatching
  qed: introduce qed_aio_start_io and qed_aio_next_io_cb
  blkdebug: reschedule coroutine on the AioContext it is running on
  coroutine-lock: reschedule coroutine on the AioContext it was running on
  nbd: convert to use qio_channel_yield
  io: make qio_channel_yield aware of AioContexts
  io: add methods to set I/O handlers on AioContext
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 11:58:03 +00:00
Paolo Bonzini
a7b91d35ba coroutine-lock: make CoRwlock thread-safe and fair
This adds a CoMutex around the existing CoQueue.  Because the write-side
can just take CoMutex, the old "writer" field is not necessary anymore.
Instead of removing it altogether, count the number of pending writers
during a read-side critical section and forbid further readers from
entering.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
1ace7ceac5 coroutine-lock: add mutex argument to CoQueue APIs
All that CoQueue needs in order to become thread-safe is help
from an external mutex.  Add this to the API.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
f8c6e1cbc3 coroutine-lock: place CoMutex before CoQueue in header
This will avoid forward references in the next patch.  It is also
more logical because CoQueue is not anymore the basic primitive.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
c05df34a87 test-aio-multithread: add performance comparison with thread-based mutexes
Add two implementations of the same benchmark as the previous patch,
but using pthreads.  One uses a normal QemuMutex, the other is Linux
only and implements a fair mutex based on MCS locks and futexes.
This shows that the slower performance of the 5-thread case is due to
the fairness of CoMutex, rather than to coroutines.  If fairness does
not matter, as is the case with two threads, CoMutex can actually be
faster than pthreads.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
480cff6322 coroutine-lock: add limited spinning to CoMutex
Running a very small critical section on pthread_mutex_t and CoMutex
shows that pthread_mutex_t is much faster because it doesn't actually
go to sleep.  What happens is that the critical section is shorter
than the latency of entering the kernel and thus FUTEX_WAIT always
fails.  With CoMutex there is no such latency but you still want to
avoid wait and wakeup.  So introduce it artificially.

This only works with one waiters; because CoMutex is fair, it will
always have more waits and wakeups than a pthread_mutex_t.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
fed20a70e3 coroutine-lock: make CoMutex thread-safe
This uses the lock-free mutex described in the paper '"Blocking without
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
Papatriantafilou.  The same technique is used in OSv, and in fact
the code is essentially a conversion to C of OSv's code.

[Added missing coroutine_fn in tests/test-aio-multithread.c.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
91bcea4899 block: document fields protected by AioContext lock
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
bd451435c0 async: remove unnecessary inc/dec pairs
Pull the increment/decrement pair out of aio_bh_poll and into the
callers.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:40 +00:00
Paolo Bonzini
a153bf52b3 aio-posix: partially inline aio_dispatch into aio_poll
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
Extract the dispatching loop for file descriptor handlers into a new
function aio_dispatch_handlers, and then inline aio_dispatch into
aio_poll.

aio_dispatch can now become void.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
b9e413dd37 block: explicitly acquire aiocontext in aio callbacks that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
1919631e6b block: explicitly acquire aiocontext in bottom halves that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:39 +00:00
Paolo Bonzini
9d45665448 block: explicitly acquire aiocontext in callbacks that need it
This covers both file descriptor callbacks and polling callbacks,
since they execute related code.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:39:36 +00:00
Paolo Bonzini
2f47da5f7f block: explicitly acquire aiocontext in timers that need it
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
0836c72f70 aio: push aio_context_acquire/release down to dispatching
The AioContext data structures are now protected by list_lock and/or
they are walked with FOREACH_RCU primitives.  There is no need anymore
to acquire the AioContext for the entire duration of aio_dispatch.
Instead, just acquire it before and after invoking the callbacks.
The next step is then to push it further down.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
b20123a28b qed: introduce qed_aio_start_io and qed_aio_next_io_cb
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
the AioContext, while qed_aio_next_io_cb will.  Split the functionality
and gain a little type-safety in the process.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
e5c67ab552 blkdebug: reschedule coroutine on the AioContext it is running on
Keep the coroutine on the same AioContext.  Without this change,
there would be a race between yielding the coroutine and reentering it.
While the race cannot happen now, because the code only runs from a single
AioContext, this will change with multiqueue support in the block layer.

While doing the change, replace custom bottom half with aio_co_schedule.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
a9d9235567 coroutine-lock: reschedule coroutine on the AioContext it was running on
As a small step towards the introduction of multiqueue, we want
coroutines to remain on the same AioContext that started them,
unless they are moved explicitly with e.g. aio_co_schedule.  This patch
avoids that coroutines switch AioContext when they use a CoMutex.
For now it does not make much of a difference, because the CoMutex
is not thread-safe and the AioContext itself is used to protect the
CoMutex from concurrent access.  However, this is going to change.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
ff82911cd3 nbd: convert to use qio_channel_yield
In the client, read the reply headers from a coroutine, switching the
read side between the "read header" coroutine and the I/O coroutine that
reads the body of the reply.

In the server, if the server can read more requests it will create a new
"read request" coroutine as soon as a request has been read.  Otherwise,
the new coroutine is created in nbd_request_put.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:08 +00:00
Paolo Bonzini
c4c497d27f io: make qio_channel_yield aware of AioContexts
Support separate coroutines for reading and writing, and place the
read/write handlers on the AioContext that the QIOChannel is registered
with.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
bf88c1247f io: add methods to set I/O handlers on AioContext
This is in preparation for making qio_channel_yield work on
AioContexts other than the main one.

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
934ebf48c0 test-thread-pool: use generic AioContext infrastructure
Once the thread pool starts using aio_co_wake, it will also need
qemu_get_current_aio_context().  Make test-thread-pool create
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
and tests/iothread.c can provide the rest.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
35f106e684 block-backend: allow blk_prw from coroutine context
qcow2_create2 calls this.  Do not run a nested event loop, as that
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
list of the currently running one.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
0c330a734b aio: introduce aio_co_schedule and aio_co_wake
aio_co_wake provides the infrastructure to start a coroutine on a "home"
AioContext.  It will be used by CoMutex and CoQueue, so that coroutines
don't jump from one context to another when they go to sleep on a
mutex or waitqueue.  However, it can also be used as a more efficient
alternative to one-shot bottom halves, and saves the effort of tracking
which AioContext a coroutine is running on.

aio_co_schedule is the part of aio_co_wake that starts a coroutine
on a remove AioContext, but it is also useful to implement e.g.
bdrv_set_aio_context callbacks.

The implementation of aio_co_schedule is based on a lock-free
multiple-producer, single-consumer queue.  The multiple producers use
cmpxchg to add to a LIFO stack.  The consumer (a per-AioContext bottom
half) grabs all items added so far, inverts the list to make it FIFO,
and goes through it one item at a time until it's empty.  The data
structure was inspired by OSv, which uses it in the very code we'll
"port" to QEMU for the thread-safe CoMutex.

Most of the new code is really tests.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Paolo Bonzini
c2b38b277a block: move AioContext, QEMUTimer, main-loop to libqemuutil
AioContext is fairly self contained, the only dependency is QEMUTimer but
that in turn doesn't need anything else.  So move them out of block-obj-y
to avoid introducing a dependency from io/ to block-obj-y.

main-loop and its dependency iohandler also need to be moved, because
later in this series io/ will call iohandler_get_aio_context.

[Changed copyright "the QEMU team" to "other QEMU contributors" as
suggested by Daniel Berrange and agreed by Paolo.
--Stefan]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-02-21 11:14:07 +00:00
Peter Maydell
b856256179 Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20170221-1' into staging
xhci: add qemu-xhci device, some followup cleanups.
ccid: better sanity checking.
ehci: fix memory leak
ohci: bugfixes.

# gpg: Signature made Tue 21 Feb 2017 07:14:35 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-usb-20170221-1:
  usb-ccid: add check message size checks
  usb-ccid: move header size check
  usb-ccid: better bulk_out error handling
  xhci: drop via vendor command handling
  xhci: fix nec vendor quirk handling
  xhci: add qemu xhci controller
  xhci: drop ER_FULL_HACK workaround
  xhci: apply limits to loops
  usb: ohci: limit the number of link eds
  usb: ohci: fix error return code in servicing iso td
  usb: ehci: fix memory leak in ehci

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-21 09:35:15 +00:00
Gerd Hoffmann
31fb4444a4 usb-ccid: add check message size checks
Check message size too when figuring whenever we should expect more data.
Fix debug message to show useful data, p->iov.size is fixed anyway if we
land there, print how much we got meanwhile instead.

Also check announced message size against actual message size.  That
is a more general fix for CVE-2017-5898 than commit "c7dfbf3 usb: ccid:
check ccid apdu length".

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
7569c54642 usb-ccid: move header size check
Move up header size check, so we can use header fields in sanity checks
(in followup patches).  Also reword the debug message.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
0aeebc73b7 usb-ccid: better bulk_out error handling
Add err goto label where we can jump to from all error conditions.
STALL request on all errors.  Reset position on all errors.

Normal request processing is not in a else branch any more, so this code
is reintended, there are no code changes in that part of the code
though.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 1487250819-23764-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
558ff1b6ef xhci: drop via vendor command handling
Seems pretty pointless, we don't emulate an via xhci controller.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-5-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
2992d6b49c xhci: fix nec vendor quirk handling
Only the TYPE_NEC_XHCI controller will have the nec vendor quirks.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-4-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
72a810f411 xhci: add qemu xhci controller
Turn existing TYPE_XHCI into an abstract base class.
Create two child classes, TYPE_NEC_XHCI (same name as old xhci
controller) and TYPE_QEMU_XHCI (using an ID from our namespace).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Message-id: 1486382139-30630-3-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
898248a329 xhci: drop ER_FULL_HACK workaround
The nec/renesas driver problems have finally been debugged and root
caused, see commit "7da76e1 xhci: fix event queue IRQ handling".

It's pretty clear now that
 (a) The whole "driver can't handle ring full" story is most likely
     wrong.
 (b) The ER_FULL_HACK workaround based on the false assumtion doesn't
     much.  It avoids the driver crashing (without commit 7da76e1), but
     it doesn't make usb work.
 (c) With 7da76e1 applied it doesn't trigger any more.

So, lets kill it.  Or, to be exact, lets almost kill it.  Some data
fields are kept unused in the state struct, for live migration backward
compatibility.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486382139-30630-2-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Gerd Hoffmann
f89b60f6e5 xhci: apply limits to loops
Limits should be big enough that normal guest should not hit it.
Add a tracepoint to log them, just in case.  Also, while being
at it, log the existing link trb limit too.

Reported-by: 李强 <liqiang6-s@360.cn>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-id: 1486383669-6421-1-git-send-email-kraxel@redhat.com
2017-02-21 08:11:43 +01:00
Li Qiang
95ed56939e usb: ohci: limit the number of link eds
The guest may builds an infinite loop with link eds. This patch
limit the number of linked ed to avoid this.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899a02e.45ca240a.6c373.93c1@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
26f670a244 usb: ohci: fix error return code in servicing iso td
It should return 1 if an error occurs when reading iso td.
This will avoid an infinite loop issue in ohci_service_ed_list.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 5899ac3e.1033240a.944d5.9a2d@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Li Qiang
d710e1e7bd usb: ehci: fix memory leak in ehci
In usb_ehci_init function, it initializes 's->ipacket', but there
is no corresponding function to free this. As the ehci can be hotplug
and unplug, this will leak host memory leak. In order to make the
hierarchy clean, we should add a ehci pci finalize function, then call
the clean function in ehci device.

Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Message-id: 589a85b8.3c2b9d0a.b8e6.1434@mx.google.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-21 08:11:42 +01:00
Peter Maydell
56f9e46b84 Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-20' into staging
QAPI patches for 2017-02-20

# gpg: Signature made Mon 20 Feb 2017 13:31:12 GMT
# gpg:                using RSA key 0x3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qapi-2017-02-20:
  Makefile: Put VERSION info into version.texi rather than using -D
  qapi2texi: replace quotation by bold section name

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 17:42:47 +00:00
Peter Maydell
c8f21dbfc3 Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20170220-1' into staging
ui: opengl fixes, for spice and egl-helpers.

# gpg: Signature made Mon 20 Feb 2017 13:12:46 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-ui-20170220-1:
  egl-helpers: Support newer MESA versions
  spice: allow to specify drm rendernode

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 16:31:38 +00:00
Peter Maydell
6753e4ed15 Merge remote-tracking branch 'remotes/kraxel/tags/pull-input-20170220-1' into staging
input: add wctablet, ps2 fix

# gpg: Signature made Mon 20 Feb 2017 11:42:12 GMT
# gpg:                using RSA key 0x4CB6D8EED3E87138
# gpg: Good signature from "Gerd Hoffmann (work) <kraxel@redhat.com>"
# gpg:                 aka "Gerd Hoffmann <gerd@kraxel.org>"
# gpg:                 aka "Gerd Hoffmann (private) <kraxel@gmail.com>"
# Primary key fingerprint: A032 8CFF B93A 17A7 9901  FE7D 4CB6 D8EE D3E8 7138

* remotes/kraxel/tags/pull-input-20170220-1:
  Add wctablet device
  ps2: fix mouse mappings for right/middle button

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 13:38:34 +00:00
Peter Maydell
fea346f569 Makefile: Put VERSION info into version.texi rather than using -D
Unfortunately some older versions of makeinfo don't correctly
handle the -D command line option and fail to set the variable.
This then causes them to complain
 docs/qemu-ga-ref.texi:41: warning: undefined flag: VERSION

Work around this by doing as the autotools do, and writing
the information into a version.texi file which we then
include from the .texi files that need it.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <1487357968-31000-1-git-send-email-peter.maydell@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:11:07 +01:00
Marc-André Lureau
1ede77dfd2 qapi2texi: replace quotation by bold section name
When we build qemu-qmp-ref.txt this causes texinfo to complain several
times:
"Negative repeat count does nothing at
/usr/share/texinfo/Texinfo/Convert/Line.pm line 124."

It also doesn't display correctly, because the "Notes" text disappears
entirely in the HTML version because it thinks there's no actual
quotation text.

The text file output formatting is also not good.

To solve those problems, remove usage of @quotation, and simply use bold
face for the section name.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20170217093416.27688-1-marcandre.lureau@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-02-20 14:10:46 +01:00
Peter Maydell
5d42ff913b Merge remote-tracking branch 'remotes/huth/tags/coldfire-20170219' into staging
Updates for the m68k ColdFire machines:
- Remove the obsolete dummy machine
- QOMify the ColdFire interrupt controller
- Volunteer for maintaining the orphan ColdFire boards

# gpg: Signature made Sat 18 Feb 2017 23:08:55 GMT
# gpg:                using RSA key 0x2ED9D774FE702DB5
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>"
# gpg:                 aka "Thomas Huth <thuth@redhat.com>"
# gpg:                 aka "Thomas Huth <huth@tuxfamily.org>"
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3  EAB9 2ED9 D774 FE70 2DB5

* remotes/huth/tags/coldfire-20170219:
  MAINTAINERS: Add odd fixer for the ColdFire boards
  hw/m68k: QOMify the ColdFire interrupt controller
  hw/m68k: Remove dummy machine

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 11:55:37 +00:00
Frediano Ziglio
0ea1523fb6 egl-helpers: Support newer MESA versions
According to
https://www.khronos.org/registry/EGL/extensions/MESA/EGL_MESA_platform_gbm.txt
if MESA_platform_gbm is supported display should be initialized
from a GBM handle using eglGetPlatformDisplayEXT.

Signed-off-by: Frediano Ziglio <fziglio@redhat.com>
Message-id: 20170220095055.4234-1-fziglio@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:46:09 +01:00
Marc-André Lureau
7b5255083b spice: allow to specify drm rendernode
When multiple GPU are available, picking the first one isn't always the
best choice. Learn to specify a device rendernode.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-id: 20170212112118.16044-1-marcandre.lureau@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2017-02-20 12:44:32 +01:00
Peter Maydell
d514cfd763 Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci: fixes, features

virtio is using region caches for performance
iommu support for IOTLBs
misc fixes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 17 Feb 2017 19:53:02 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (23 commits)
  intel_iommu: vtd_slpt_level_shift check level
  intel_iommu: convert dbg macros to trace for trans
  intel_iommu: convert dbg macros to traces for inv
  intel_iommu: renaming gpa to iova where proper
  intel_iommu: simplify irq region translation
  intel_iommu: add "caching-mode" option
  vfio: allow to notify unmap for very large region
  vfio: introduce vfio_get_vaddr()
  vfio: trace map/unmap for notify as well
  pcie: simplify pcie_add_capability()
  virtio: Fix no interrupt when not creating msi controller
  virtio: use VRingMemoryRegionCaches for avail and used rings
  virtio: check for vring setup in virtio_queue_update_used_idx
  virtio: use VRingMemoryRegionCaches for descriptor ring
  virtio: add MemoryListener to cache ring translations
  virtio: use MemoryRegionCache to access descriptors
  exec: make address_space_cache_destroy idempotent
  virtio: use address_space_map/unmap to access descriptors
  virtio: add virtio_*_phys_cached
  memory: make memory_listener_unregister idempotent
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-20 09:53:59 +00:00
Thomas Huth
5baf2741b4 MAINTAINERS: Add odd fixer for the ColdFire boards
I did some work with real ColdFire boards in the past, and after
QOMifying most of the ColdFire devices recently, I feel confident
that I could at least take care of odd fixes for these boards.

Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
88b86983f3 hw/m68k: QOMify the ColdFire interrupt controller
Use type_init() and friends to adapt the ColdFire interrupt
controller to the latest QEMU device conventions.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:31 +01:00
Thomas Huth
22f2dbe7ea hw/m68k: Remove dummy machine
Since it is now possible to instantiate a CPU and RAM with the "none"
machine, too, and a kernel can be loaded there with the generic loader
device, there is no more need for the m68k "dummy" machine. Thus let's
remove this unmaintained file now.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Thomas Huth <huth@tuxfamily.org>
2017-02-18 22:23:25 +01:00
Peter Xu
7e58326ad7 intel_iommu: vtd_slpt_level_shift check level
This helps in debugging incorrect level passed in.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6c441e1d61 intel_iommu: convert dbg macros to trace for trans
Another patch to convert the DPRINTF() stuffs. This patch focuses on the
address translation path and caching.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
bc535e59c4 intel_iommu: convert dbg macros to traces for inv
VT-d codes are still using static DEBUG_INTEL_IOMMU macro. That's not
good, and we should end the day when we need to recompile the code
before getting useful debugging information for vt-d. Time to switch to
the trace system. This is the first patch to do it.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
6e9055641b intel_iommu: renaming gpa to iova where proper
There are lots of places in current intel_iommu.c codes that named
"iova" as "gpa". It is really confusing to use a name "gpa" in these
places (which is very easily to be understood as "Guest Physical
Address", while it's not). To make the codes (much) easier to be read, I
decided to do this once and for all.

No functional change is made. Only literal ones.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
046ab7e9be intel_iommu: simplify irq region translation
Now we have a standalone memory region for MSI, all the irq region
requests should be redirected there. Cleaning up the block with an
assertion instead.

Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Aviv Ben-David
3b40f0e53c intel_iommu: add "caching-mode" option
This capability asks the guest to invalidate cache before each map operation.
We can use this invalidation to trap map operations in the hypervisor.

Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
[peterx: using "caching-mode" instead of "cache-mode" to align with spec]
[peterx: re-write the subject to make it short and clear]
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
dfbd90e5b9 vfio: allow to notify unmap for very large region
Linux vfio driver supports to do VFIO_IOMMU_UNMAP_DMA for a very big
region. This can be leveraged by QEMU IOMMU implementation to cleanup
existing page mappings for an entire iova address space (by notifying
with an IOTLB with extremely huge addr_mask). However current
vfio_iommu_map_notify() does not allow that. It make sure that all the
translated address in IOTLB is falling into RAM range.

The check makes sense, but it should only be a sensible checker for
mapping operations, and mean little for unmap operations.

This patch moves this check into map logic only, so that we'll get
faster unmap handling (no need to translate again), and also we can then
better support unmapping a very big region when it covers non-ram ranges
or even not-existing ranges.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
4a4b88fbe1 vfio: introduce vfio_get_vaddr()
A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
the operation is unmap, but it won't hurt much.

One thing to mention is that we need the RCU read lock to protect the
whole translation and map/unmap procedure.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
3213835720 vfio: trace map/unmap for notify as well
We traces its range, but we don't know whether it's a MAP/UNMAP. Let's
dump it as well.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Peter Xu
d4e9b75aa0 pcie: simplify pcie_add_capability()
When we add PCIe extended capabilities, we should be following the rule
that we add the head extended cap (at offset 0x100) first, then the rest
of them. Meanwhile, we are always adding new capability bits at the end
of the list. Here the "next" looks meaningless in all cases since it
should always be zero (along with the "header").

Simplify the function a bit, and it looks more readable now.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:31 +02:00
Michael S. Tsirkin
b4b9862b53 virtio: Fix no interrupt when not creating msi controller
For ARM virt machine, if we use virt-2.7 which will not create ITS node,
the virtio-net can not recieve interrupts so it can't get ip address
through dhcp.
This fixes commit 83d768b(virtio: set ISR on dataplane notifications).

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
97cd965c07 virtio: use VRingMemoryRegionCaches for avail and used rings
The virtio-net change is necessary because it uses virtqueue_fill
and virtqueue_flush instead of the more convenient virtqueue_push.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
ca0176ad83 virtio: check for vring setup in virtio_queue_update_used_idx
If the vring has not been set up, it is not necessary for vring_used_idx
to do anything (as is already the case when the caller is virtio_load).
This is harmless for now, but it will be a problem when the
MemoryRegionCache has not been set up.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
991976f751 virtio: use VRingMemoryRegionCaches for descriptor ring
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
c611c76417 virtio: add MemoryListener to cache ring translations
The cached translations are RCU-protected to allow efficient use
when processing virtqueues.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
5eba0404b9 virtio: use MemoryRegionCache to access descriptors
For now, the cache is created on every virtqueue_pop.  Later on,
direct descriptors will be able to reuse it.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
91047df38d exec: make address_space_cache_destroy idempotent
Clear cache->mr so that address_space_cache_destroy does nothing
the second time it is called.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
9796d0ac8f virtio: use address_space_map/unmap to access descriptors
This makes little difference, but it makes the code change smaller
for the next patch that introduces MemoryRegionCache.  This is
because map/unmap are similar to MemoryRegionCache init/destroy.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
e6a830d6eb virtio: add virtio_*_phys_cached
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Paolo Bonzini
1d8280c18f memory: make memory_listener_unregister idempotent
Make it easy to unregister a MemoryListener without tracking whether it
had been registered before.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Haozhong Zhang
79c0f397fe docs: add document to explain the usage of vNVDIMM
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Fam Zheng
0793169870 virtio: Report real progress in VQ aio poll handler
In virtio_queue_host_notifier_aio_poll, not all "!virtio_queue_empty()"
cases are making true progress.

Currently the offending one is virtio-scsi event queue, whose handler
does nothing if no event is pending. As a result aio_poll() will spin on
the "non-empty" VQ and take 100% host CPU.

Fix this by reporting actual progress from virtio queue aio handlers.

Reported-by: Ed Swierk <eswierk@skyportsystems.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Tested-by: Ed Swierk <eswierk@skyportsystems.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-02-17 21:52:30 +02:00
Michael S. Tsirkin
4bb571d857 pci/pcie: don't assume cap id 0 is reserved
VFIO actually wants to create a capability with ID == 0.
This is done to make guest drivers skip the given capability.
pcie_add_capability then trips up on this capability
when looking for end of capability list.

To support this use-case, it's easy enough to switch to
e.g. 0xffffffff for these comparisons - we can be sure
it will never match a 16-bit capability ID.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
2017-02-17 21:52:30 +02:00
393 changed files with 13001 additions and 4082 deletions

1
.gitignore vendored
View File

@@ -107,6 +107,7 @@ docs/qemu-ga-ref.info*
docs/qemu-qmp-ref.info*
/qemu-ga-qapi.texi
/qemu-qapi.texi
/version.texi
*.tps
.stgit-*
cscope.*

19
.shippable.yml Normal file
View File

@@ -0,0 +1,19 @@
language: c
env:
matrix:
- IMAGE=debian-armhf-cross
TARGET_LIST=arm-softmmu,arm-linux-user
- IMAGE=debian-arm64-cross
TARGET_LIST=aarch64-softmmu,aarch64-linux-user
build:
pre_ci:
- make docker-image-${IMAGE}
pre_ci_boot:
image_name: qemu
image_tag: ${IMAGE}
pull: false
options: "-e HOME=/root"
ci:
- unset CC
- ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
- make -j2

View File

@@ -116,3 +116,10 @@ if (a == 1) {
Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
Besides, good compilers already warn users when '==' is mis-typed as '=',
even when the constant is on the right.
7. Comment style
We use traditional C-style /* */ comments and avoid // comments.
Rationale: The // form is valid in C99, so this is purely a matter of
consistency of style. The checkpatch script will warn you about this.

View File

@@ -561,20 +561,19 @@ F: hw/lm32/milkymist.c
M68K Machines
-------------
an5206
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/an5206.c
F: hw/m68k/mcf5206.c
dummy_m68k
S: Orphan
F: hw/m68k/dummy_m68k.c
mcf5208
S: Orphan
M: Thomas Huth <huth@tuxfamily.org>
S: Odd Fixes
F: hw/m68k/mcf5208.c
F: hw/m68k/mcf_intc.c
F: hw/char/mcf_uart.c
F: hw/net/mcf_fec.c
F: include/hw/m68k/mcf*.h
MicroBlaze Machines
-------------------
@@ -1801,9 +1800,14 @@ F: docs/block-replication.txt
Build and test automation
-------------------------
M: Alex Bennée <alex.bennee@linaro.org>
M: Fam Zheng <famz@redhat.com>
L: qemu-devel@nongnu.org
S: Supported
S: Maintained
F: .travis.yml
F: .shippable.yml
F: tests/docker/
W: https://travis-ci.org/qemu/qemu
W: http://patchew.org/QEMU/
Documentation
-------------
@@ -1812,9 +1816,3 @@ M: Daniel P. Berrange <berrange@redhat.com>
S: Odd Fixes
F: docs/build-system.txt
Docker testing
--------------
Docker based testing framework and cases
M: Fam Zheng <famz@redhat.com>
S: Maintained
F: tests/docker/

View File

@@ -516,7 +516,7 @@ distclean: clean
rm -f qemu-doc.vr qemu-doc.txt
rm -f config.log
rm -f linux-headers/asm
rm -f qemu-ga-qapi.texi qemu-qapi.texi
rm -f qemu-ga-qapi.texi qemu-qapi.texi version.texi
rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -663,21 +663,24 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
# documentation
MAKEINFO=makeinfo
MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
MAKEINFOFLAGS=--no-split --number-sections
TEXIFLAG=$(if $(V),,--quiet)
%.html: %.texi
version.texi: $(SRC_PATH)/VERSION
$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
%.html: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--html $< -o $@,"GEN","$@")
%.info: %.texi
%.info: %.texi version.texi
$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
%.txt: %.texi
%.txt: %.texi version.texi
$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
--plaintext $< -o $@,"GEN","$@")
%.pdf: %.texi
%.pdf: %.texi version.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool

View File

@@ -9,12 +9,8 @@ chardev-obj-y = chardev/
#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = async.o thread-pool.o
block-obj-y += nbd/
block-obj-y += block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
block-obj-$(CONFIG_REPLICATION) += replication.o

View File

@@ -28,6 +28,7 @@
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "qemu/cutils.h"
#include "sysemu/replay.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
static void audio_reset_timer (AudioState *s)
{
if (audio_is_timer_needed ()) {
timer_mod (s->ts,
timer_mod_anticipate_ns(s->ts,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
}
else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
prev_rpos = hw->rpos;
played = hw->pcm_ops->run_out (hw, live);
replay_audio_out(&played);
if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
SWVoiceIn *sw;
int captured, min;
int captured = 0, min;
captured = hw->pcm_ops->run_in (hw);
if (replay_mode != REPLAY_MODE_PLAY) {
captured = hw->pcm_ops->run_in(hw);
}
replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
min = audio_pcm_hw_find_min_in (hw);
hw->total_samples_captured += captured - min;

View File

@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right);
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right);
#endif /* QEMU_AUDIO_H */

View File

@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
}
};
void audio_sample_to_uint64(void *samples, int pos,
uint64_t *left, uint64_t *right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
*left = sample->l;
*right = sample->r;
#endif
}
void audio_sample_from_uint64(void *samples, int pos,
uint64_t left, uint64_t right)
{
struct st_sample *sample = samples;
sample += pos;
#ifdef FLOAT_MIXENG
error_report(
"Coreaudio and floating point samples are not supported by replay yet");
abort();
#else
sample->l = left;
sample->r = right;
#endif
}
/*
* August 21, 1998
* Copyright 1998 Fabrice Bellard.

View File

@@ -38,10 +38,14 @@
#define AUDIO_CAP "sdl"
#include "audio_int.h"
#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
typedef struct SDLVoiceOut {
HWVoiceOut hw;
int live;
#if USE_SEMAPHORE
int rpos;
#endif
int decr;
} SDLVoiceOut;
@@ -53,8 +57,10 @@ static struct {
static struct SDLAudioState {
int exit;
#if USE_SEMAPHORE
SDL_mutex *mutex;
SDL_sem *sem;
#endif
int initialized;
bool driver_created;
} glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
static int sdl_lock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_LockMutex (s->mutex)) {
sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_LockAudio();
#endif
return 0;
}
static int sdl_unlock (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_UnlockMutex (s->mutex)) {
sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
return -1;
}
#else
SDL_UnlockAudio();
#endif
return 0;
}
static int sdl_post (SDLAudioState *s, const char *forfn)
{
#if USE_SEMAPHORE
if (SDL_SemPost (s->sem)) {
sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
return -1;
}
#endif
return 0;
}
#if USE_SEMAPHORE
static int sdl_wait (SDLAudioState *s, const char *forfn)
{
if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
}
return 0;
}
#endif
static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
{
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
int to_mix, decr;
/* dolog ("in callback samples=%d\n", samples); */
#if USE_SEMAPHORE
sdl_wait (s, "sdl_callback");
if (s->exit) {
return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
if (!sdl->live) {
goto again;
}
#else
if (s->exit || !sdl->live) {
break;
}
#endif
/* dolog ("in callback live=%d\n", live); */
to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
/* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
hw->clip (buf, src, chunk);
#if USE_SEMAPHORE
sdl->rpos = (sdl->rpos + chunk) % hw->samples;
#else
hw->rpos = (hw->rpos + chunk) % hw->samples;
#endif
to_mix -= chunk;
buf += chunk << hw->info.shift;
}
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
sdl->live -= decr;
sdl->decr += decr;
#if USE_SEMAPHORE
again:
if (sdl_unlock (s, "sdl_callback")) {
return;
}
#endif
}
/* dolog ("done len=%d\n", len); */
#if (SDL_MAJOR_VERSION >= 2)
/* SDL2 does not clear the remaining buffer for us, so do it on our own */
if (samples) {
memset(buf, 0, samples << hw->info.shift);
}
#endif
}
static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
decr = audio_MIN (sdl->decr, live);
sdl->decr -= decr;
#if USE_SEMAPHORE
sdl->live = live - decr;
hw->rpos = sdl->rpos;
#else
sdl->live = live;
#endif
if (sdl->live > 0) {
sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
return NULL;
}
#if USE_SEMAPHORE
s->mutex = SDL_CreateMutex ();
if (!s->mutex) {
sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
SDL_QuitSubSystem (SDL_INIT_AUDIO);
return NULL;
}
#endif
s->driver_created = true;
return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
{
SDLAudioState *s = opaque;
sdl_close (s);
#if USE_SEMAPHORE
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
#endif
SDL_QuitSubSystem (SDL_INIT_AUDIO);
s->driver_created = false;
}

269
block.c
View File

@@ -588,21 +588,20 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
return drv;
}
static int find_image_format(BdrvChild *file, const char *filename,
static int find_image_format(BlockBackend *file, const char *filename,
BlockDriver **pdrv, Error **errp)
{
BlockDriverState *bs = file->bs;
BlockDriver *drv;
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
*pdrv = &bdrv_raw;
return ret;
}
ret = bdrv_pread(file, 0, buf, sizeof(buf));
ret = blk_pread(file, 0, buf, sizeof(buf));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read image for determining its "
"format");
@@ -926,6 +925,95 @@ out:
g_free(gen_node_name);
}
static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
const char *node_name, QDict *options,
int open_flags, Error **errp)
{
Error *local_err = NULL;
int ret;
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
}
bs->drv = drv;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
bs->opaque = g_malloc0(drv->instance_size);
if (drv->bdrv_file_open) {
assert(!drv->bdrv_needs_filename || bs->filename[0]);
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
} else if (drv->bdrv_open) {
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
} else {
ret = 0;
}
if (ret < 0) {
if (local_err) {
error_propagate(errp, local_err);
} else if (bs->filename[0]) {
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
} else {
error_setg_errno(errp, -ret, "Could not open image");
}
goto free_and_fail;
}
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
goto free_and_fail;
}
bdrv_refresh_limits(bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto free_and_fail;
}
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
return 0;
free_and_fail:
/* FIXME Close bs first if already opened*/
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
return ret;
}
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp)
{
BlockDriverState *bs;
int ret;
bs = bdrv_new();
bs->open_flags = flags;
bs->explicit_options = qdict_new();
bs->options = qdict_new();
bs->opaque = NULL;
update_options_from_flags(bs->options, flags);
ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
if (ret < 0) {
QDECREF(bs->explicit_options);
QDECREF(bs->options);
bdrv_unref(bs);
return NULL;
}
return bs;
}
QemuOptsList bdrv_runtime_opts = {
.name = "bdrv_common",
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@@ -974,7 +1062,7 @@ QemuOptsList bdrv_runtime_opts = {
*
* Removes all processed options from *options.
*/
static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
QDict *options, Error **errp)
{
int ret, open_flags;
@@ -1005,7 +1093,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
assert(drv != NULL);
if (file != NULL) {
filename = file->bs->filename;
filename = blk_bs(file)->filename;
} else {
filename = qdict_get_try_str(options, "filename");
}
@@ -1020,14 +1108,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
drv->format_name);
node_name = qemu_opt_get(opts, "node-name");
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail_opts;
}
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -1093,62 +1173,19 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
}
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
bs->drv = drv;
bs->opaque = g_malloc0(drv->instance_size);
/* Open the image, either directly or using a protocol */
open_flags = bdrv_open_flags(bs, bs->open_flags);
if (drv->bdrv_file_open) {
assert(file == NULL);
assert(!drv->bdrv_needs_filename || filename != NULL);
ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
} else {
if (file == NULL) {
error_setg(errp, "Can't use '%s' as a block driver for the "
"protocol level", drv->format_name);
ret = -EINVAL;
goto free_and_fail;
}
bs->file = file;
ret = drv->bdrv_open(bs, options, open_flags, &local_err);
}
node_name = qemu_opt_get(opts, "node-name");
assert(!drv->bdrv_file_open || file == NULL);
ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
if (ret < 0) {
if (local_err) {
error_propagate(errp, local_err);
} else if (bs->filename[0]) {
error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
} else {
error_setg_errno(errp, -ret, "Could not open image");
}
goto free_and_fail;
goto fail_opts;
}
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
goto free_and_fail;
}
bdrv_refresh_limits(bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto free_and_fail;
}
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
qemu_opts_del(opts);
return 0;
free_and_fail:
bs->file = NULL;
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
fail_opts:
qemu_opts_del(opts);
return ret;
@@ -1169,13 +1206,13 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
return NULL;
}
if (qobject_type(options_obj) != QTYPE_QDICT) {
options = qobject_to_qdict(options_obj);
if (!options) {
qobject_decref(options_obj);
error_setg(errp, "Invalid JSON object given");
return NULL;
}
options = qobject_to_qdict(options_obj);
qdict_flatten(options);
return options;
@@ -1368,7 +1405,18 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
}
if (child->bs->inherits_from == parent) {
child->bs->inherits_from = NULL;
BdrvChild *c;
/* Remove inherits_from only when the last reference between parent and
* child->bs goes away. */
QLIST_FOREACH(c, &parent->children, next) {
if (c != child && c->bs == child->bs) {
break;
}
}
if (c == NULL) {
child->bs->inherits_from = NULL;
}
}
bdrv_root_unref_child(child);
@@ -1543,28 +1591,12 @@ free_exit:
return ret;
}
/*
* Opens a disk image whose options are given as BlockdevRef in another block
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
* itself, all options starting with "${bdref_key}." are considered part of the
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
static BlockDriverState *
bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
BlockDriverState *parent, const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c = NULL;
BlockDriverState *bs;
BlockDriverState *bs = NULL;
QDict *image_options;
char *bdref_key_dot;
const char *reference;
@@ -1591,11 +1623,40 @@ BdrvChild *bdrv_open_child(const char *filename,
goto done;
}
c = bdrv_attach_child(parent, bs, bdref_key, child_role);
done:
qdict_del(options, bdref_key);
return c;
return bs;
}
/*
* Opens a disk image whose options are given as BlockdevRef in another block
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
* itself, all options starting with "${bdref_key}." are considered part of the
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState *parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
allow_none, errp);
if (bs == NULL) {
return NULL;
}
return bdrv_attach_child(parent, bs, bdref_key, child_role);
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1691,7 +1752,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
Error **errp)
{
int ret;
BdrvChild *file = NULL;
BlockBackend *file = NULL;
BlockDriverState *bs;
BlockDriver *drv = NULL;
const char *drvname;
@@ -1789,13 +1850,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
qdict_del(options, "backing");
}
/* Open image file without format layer */
/* Open image file without format layer. This BlockBackend is only used for
* probing, the block drivers will do their own bdrv_open_child() for the
* same BDS, which is why we put the node name back into options. */
if ((flags & BDRV_O_PROTOCOL) == 0) {
file = bdrv_open_child(filename, options, "file", bs,
&child_file, true, &local_err);
BlockDriverState *file_bs;
file_bs = bdrv_open_child_bs(filename, options, "file", bs,
&child_file, true, &local_err);
if (local_err) {
goto fail;
}
if (file_bs != NULL) {
file = blk_new();
blk_insert_bs(file, file_bs);
bdrv_unref(file_bs);
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
}
}
/* Image format probing */
@@ -1835,8 +1908,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file && (bs->file != file)) {
bdrv_unref_child(bs, file);
if (file) {
blk_unref(file);
file = NULL;
}
@@ -1898,8 +1971,9 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
return bs;
fail:
if (file != NULL) {
bdrv_unref_child(bs, file);
blk_unref(file);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
}
QDECREF(snapshot_options);
QDECREF(bs->explicit_options);
@@ -2626,8 +2700,9 @@ exit:
/**
* Truncate file to 'offset' bytes (needed only for file protocols)
*/
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
int bdrv_truncate(BdrvChild *child, int64_t offset)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
if (!drv)

View File

@@ -64,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
retry = true;
break;
}

View File

@@ -405,12 +405,6 @@ out:
return ret;
}
static void error_callback_bh(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
{
BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
}
if (!immediately) {
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
qemu_coroutine_self());
aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
qemu_coroutine_yield();
}
@@ -670,7 +663,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
{
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)

View File

@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
static void blkreplay_bh_cb(void *opaque)
{
Request *req = opaque;
qemu_coroutine_enter(req->co);
aio_co_wake(req->co);
qemu_bh_delete(req->bh);
g_free(req);
}

View File

@@ -880,7 +880,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
{
QEMUIOVector qiov;
struct iovec iov;
Coroutine *co;
BlkRwCo rwco;
iov = (struct iovec) {
@@ -897,9 +896,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
} else {
Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
qemu_coroutine_enter(co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
return rwco.ret;
}
@@ -979,7 +983,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
static void blk_aio_complete_bh(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
assert(acb->has_returned);
blk_aio_complete(acb);
}
@@ -1602,7 +1605,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return -ENOMEDIUM;
}
return bdrv_truncate(blk_bs(blk), offset);
return bdrv_truncate(blk->root, offset);
}
static void blk_pdiscard_entry(void *opaque)

View File

@@ -104,6 +104,12 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
struct bochs_header bochs;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true; /* no write support yet */
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));

View File

@@ -66,6 +66,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->read_only = true;
/* read header */

View File

@@ -300,6 +300,12 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
QCryptoBlockOpenOptions *open_opts = NULL;
unsigned int cflags = 0;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -383,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
offset += payload_offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static void block_crypto_close(BlockDriverState *bs)

View File

@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
QemuMutex mutex;
char *username;
char *password;
char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
return FIND_RET_NONE;
}
/* Called with s->mutex held. */
static void curl_multi_check_completion(BDRVCURLState *s)
{
int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
continue;
}
qemu_mutex_unlock(&s->mutex);
acb->common.cb(acb->common.opaque, -EPROTO);
qemu_mutex_lock(&s->mutex);
qemu_aio_unref(acb);
state->acb[i] = NULL;
}
@@ -386,9 +390,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
}
}
static void curl_multi_do(void *arg)
/* Called with s->mutex held. */
static void curl_multi_do_locked(CURLState *s)
{
CURLState *s = (CURLState *)arg;
CURLSocket *socket, *next_socket;
int running;
int r;
@@ -406,12 +410,23 @@ static void curl_multi_do(void *arg)
}
}
static void curl_multi_do(void *arg)
{
CURLState *s = (CURLState *)arg;
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_read(void *arg)
{
CURLState *s = (CURLState *)arg;
curl_multi_do(arg);
qemu_mutex_lock(&s->s->mutex);
curl_multi_do_locked(s);
curl_multi_check_completion(s->s);
qemu_mutex_unlock(&s->s->mutex);
}
static void curl_multi_timeout_do(void *arg)
@@ -424,9 +439,11 @@ static void curl_multi_timeout_do(void *arg)
return;
}
qemu_mutex_lock(&s->mutex);
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
curl_multi_check_completion(s);
qemu_mutex_unlock(&s->mutex);
#else
abort();
#endif
@@ -759,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
curl_easy_cleanup(state->curl);
state->curl = NULL;
qemu_mutex_init(&s->mutex);
curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
@@ -784,13 +802,17 @@ static void curl_readv_bh_cb(void *p)
{
CURLState *state;
int running;
int ret = -EINPROGRESS;
CURLAIOCB *acb = p;
BDRVCURLState *s = acb->common.bs->opaque;
BlockDriverState *bs = acb->common.bs;
BDRVCURLState *s = bs->opaque;
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
size_t end;
qemu_mutex_lock(&s->mutex);
// In case we have the requested data already (e.g. read-ahead),
// we can just call the callback and be done.
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -798,7 +820,7 @@ static void curl_readv_bh_cb(void *p)
qemu_aio_unref(acb);
// fall through
case FIND_RET_WAIT:
return;
goto out;
default:
break;
}
@@ -806,9 +828,8 @@ static void curl_readv_bh_cb(void *p)
// No cache found, so let's start a new request
state = curl_init_state(acb->common.bs, s);
if (!state) {
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_unref(acb);
return;
ret = -EIO;
goto out;
}
acb->start = 0;
@@ -822,9 +843,8 @@ static void curl_readv_bh_cb(void *p)
state->orig_buf = g_try_malloc(state->buf_len);
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->common.cb(acb->common.opaque, -ENOMEM);
qemu_aio_unref(acb);
return;
ret = -ENOMEM;
goto out;
}
state->acb[0] = acb;
@@ -837,6 +857,13 @@ static void curl_readv_bh_cb(void *p)
/* Tell curl it needs to kick things off */
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
out:
qemu_mutex_unlock(&s->mutex);
if (ret != -EINPROGRESS) {
acb->common.cb(acb->common.opaque, ret);
qemu_aio_unref(acb);
}
}
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
@@ -861,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
DPRINTF("CURL: Close\n");
curl_detach_aio_context(bs);
qemu_mutex_destroy(&s->mutex);
g_free(s->cookie);
g_free(s->url);

View File

@@ -413,6 +413,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
block_module_load_one("dmg-bz2");
bs->read_only = true;

View File

@@ -1591,18 +1591,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
}
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
switch (prealloc) {
#ifdef CONFIG_POSIX_FALLOCATE
case PREALLOC_MODE_FALLOC:
/* posix_fallocate() doesn't set errno. */
/*
* Truncating before posix_fallocate() makes it about twice slower on
* file systems that do not support fallocate(), trying to check if a
* block is allocated before allocating it, so don't do that here.
*/
result = -posix_fallocate(fd, 0, total_size);
if (result != 0) {
/* posix_fallocate() doesn't set errno. */
error_setg_errno(errp, -result,
"Could not preallocate data for the new file");
}
@@ -1610,6 +1609,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
#endif
case PREALLOC_MODE_FULL:
{
/*
* Knowing the final size from the beginning could allow the file
* system driver to do less allocations and possibly avoid
* fragmentation of the file.
*/
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
goto out_close;
}
int64_t num = 0, left = total_size;
buf = g_malloc0(65536);
@@ -1636,6 +1646,10 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
break;
}
case PREALLOC_MODE_OFF:
if (ftruncate(fd, total_size) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
}
break;
default:
result = -EINVAL;

View File

@@ -698,13 +698,6 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
return qemu_gluster_glfs_init(gconf, errp);
}
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_coroutine_enter(acb->coroutine);
}
/*
* AIO callback routine called from GlusterFS thread.
*/
@@ -720,7 +713,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
acb->ret = -EIO; /* Partial read/write - fail it */
}
aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
aio_co_schedule(acb->aio_context, acb->coroutine);
}
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)

View File

@@ -189,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_dec_in_flight(bs);
bdrv_drained_begin(bs);
data->done = true;
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -539,7 +539,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue);
qemu_co_queue_wait(&req->wait_queue, NULL);
self->waiting_for = NULL;
retry = true;
waited = true;
@@ -813,7 +813,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
CoroutineIOCompletion *co = opaque;
co->ret = ret;
qemu_coroutine_enter(co->coroutine);
aio_co_wake(co->coroutine);
}
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -2080,6 +2080,11 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
if (acb->aiocb_info->get_aio_context) {
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
} else if (acb->bs) {
/* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
* assert that we're not using an I/O thread. Thread-safe
* code should use bdrv_aio_cancel_async exclusively.
*/
assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
aio_poll(bdrv_get_aio_context(acb->bs), true);
} else {
abort();
@@ -2239,35 +2244,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
return &acb->common;
}
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
BlockAIOCB *acb;
acb = g_malloc(aiocb_info->aiocb_size);
acb->aiocb_info = aiocb_info;
acb->bs = bs;
acb->cb = cb;
acb->opaque = opaque;
acb->refcnt = 1;
return acb;
}
void qemu_aio_ref(void *p)
{
BlockAIOCB *acb = p;
acb->refcnt++;
}
void qemu_aio_unref(void *p)
{
BlockAIOCB *acb = p;
assert(acb->refcnt > 0);
if (--acb->refcnt == 0) {
g_free(acb);
}
}
/**************************************************************/
/* Coroutine block device emulation */
@@ -2299,7 +2275,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
qemu_co_queue_wait(&bs->flush_queue);
qemu_co_queue_wait(&bs->flush_queue, NULL);
}
bs->active_flush_req = true;

View File

@@ -58,6 +58,7 @@ typedef struct IscsiLun {
int events;
QEMUTimer *nop_timer;
QEMUTimer *event_timer;
QemuMutex mutex;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
@@ -165,8 +166,9 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
static void iscsi_co_generic_bh_cb(void *opaque)
{
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -174,7 +176,7 @@ static void iscsi_retry_timer_expired(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
qemu_coroutine_enter(iTask->co);
aio_co_wake(iTask->co);
}
}
@@ -251,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
return ret;
}
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -351,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
/* Called with QemuMutex held. */
static void
iscsi_set_events(IscsiLun *iscsilun)
{
@@ -394,8 +398,10 @@ iscsi_process_read(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLIN);
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
}
static void
@@ -404,8 +410,10 @@ iscsi_process_write(void *arg)
IscsiLun *iscsilun = arg;
struct iscsi_context *iscsi = iscsilun->iscsi;
qemu_mutex_lock(&iscsilun->mutex);
iscsi_service(iscsi, POLLOUT);
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
}
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -584,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
uint64_t lba;
uint32_t num_sectors;
bool fua = flags & BDRV_REQ_FUA;
int r = 0;
if (fua) {
assert(iscsilun->dpofua);
@@ -599,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -635,7 +645,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -650,12 +662,15 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
@@ -688,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
goto out;
}
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
sector_qemu2lun(sector_num, iscsilun),
8 + 16, iscsi_co_generic_cb,
&iTask) == NULL) {
ret = -ENOMEM;
goto out;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.do_retry) {
@@ -716,20 +734,20 @@ retry:
* because the device is busy or the cmd is not
* supported) we pretend all blocks are allocated
* for backwards compatibility */
goto out;
goto out_unlock;
}
lbas = scsi_datain_unmarshall(iTask.task);
if (lbas == NULL) {
ret = -EIO;
goto out;
goto out_unlock;
}
lbasd = &lbas->descriptors[0];
if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
ret = -EIO;
goto out;
goto out_unlock;
}
*pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -751,6 +769,8 @@ retry:
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -813,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsilun->use_16_for_rw) {
#if LIBISCSI_API_VERSION >= (20160603)
@@ -850,7 +871,9 @@ retry:
#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -862,6 +885,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -876,6 +900,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
struct IscsiTask iTask;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -884,7 +909,9 @@ retry:
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -896,6 +923,7 @@ retry:
iTask.complete = 0;
goto retry;
}
qemu_mutex_unlock(&iscsilun->mutex);
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
@@ -905,6 +933,7 @@ retry:
}
#ifdef __linux__
/* Called (via iscsi_service) with QemuMutex held. */
static void
iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -1029,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->task->expxferlen = acb->ioh->dxfer_len;
data.size = 0;
qemu_mutex_lock(&iscsilun->mutex);
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
if (acb->ioh->iovec_count == 0) {
data.data = acb->ioh->dxferp;
@@ -1044,6 +1074,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
iscsi_aio_ioctl_cb,
(data.size > 0) ? &data : NULL,
acb) != 0) {
qemu_mutex_unlock(&iscsilun->mutex);
scsi_free_scsi_task(acb->task);
qemu_aio_unref(acb);
return NULL;
@@ -1063,6 +1094,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
}
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
return &acb->common;
}
@@ -1087,6 +1119,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1101,15 +1134,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
qemu_mutex_lock(&iscsilun->mutex);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
r = -ENOMEM;
goto out_unlock;
}
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.task != NULL) {
@@ -1126,17 +1163,20 @@ retry:
/* the target might fail with a check condition if it
is not happy with the alignment of the UNMAP request
we silently fail in this case */
return 0;
goto out_unlock;
}
if (iTask.status != SCSI_STATUS_GOOD) {
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static int
@@ -1148,6 +1188,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
uint64_t lba;
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
int r = 0;
if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
return -ENOTSUP;
@@ -1181,6 +1222,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
}
}
qemu_mutex_lock(&iscsilun->mutex);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (use_16_for_ws) {
@@ -1200,7 +1242,9 @@ retry:
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_mutex_unlock(&iscsilun->mutex);
qemu_coroutine_yield();
qemu_mutex_lock(&iscsilun->mutex);
}
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1210,7 +1254,8 @@ retry:
/* WRITE SAME is not supported by the target */
iscsilun->has_write_same = false;
scsi_free_scsi_task(iTask.task);
return -ENOTSUP;
r = -ENOTSUP;
goto out_unlock;
}
if (iTask.task != NULL) {
@@ -1226,7 +1271,8 @@ retry:
if (iTask.status != SCSI_STATUS_GOOD) {
iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
count >> BDRV_SECTOR_BITS);
return iTask.err_code;
r = iTask.err_code;
goto out_unlock;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1237,32 +1283,19 @@ retry:
count >> BDRV_SECTOR_BITS);
}
return 0;
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
return r;
}
static void parse_chap(struct iscsi_context *iscsi, const char *target,
static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
const char *secretid;
char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (!user) {
return;
@@ -1293,64 +1326,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
Error **errp)
{
QemuOptsList *list;
QemuOpts *opts;
const char *digest = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
digest = qemu_opt_get(opts, "header-digest");
if (!digest) {
return;
}
if (!strcmp(digest, "CRC32C")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else if (!strcmp(digest, "crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
} else if (!strcmp(digest, "NONE")) {
} else if (!strcmp(digest, "none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
} else if (!strcmp(digest, "CRC32C-NONE")) {
} else if (!strcmp(digest, "crc32c-none")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
} else if (!strcmp(digest, "NONE-CRC32C")) {
} else if (!strcmp(digest, "none-crc32c")) {
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
} else {
error_setg(errp, "Invalid header-digest setting : %s", digest);
}
}
static char *parse_initiator_name(const char *target)
static char *get_initiator_name(QemuOpts *opts)
{
QemuOptsList *list;
QemuOpts *opts;
const char *name;
char *iscsi_name;
UuidInfo *uuid_info;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
}
name = qemu_opt_get(opts, "initiator-name");
if (name) {
return g_strdup(name);
}
uuid_info = qmp_query_uuid(NULL);
@@ -1365,43 +1370,24 @@ static char *parse_initiator_name(const char *target)
return iscsi_name;
}
static int parse_timeout(const char *target)
{
QemuOptsList *list;
QemuOpts *opts;
const char *timeout;
list = qemu_find_opts("iscsi");
if (list) {
opts = qemu_opts_find(list, target);
if (!opts) {
opts = QTAILQ_FIRST(&list->head);
}
if (opts) {
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
return atoi(timeout);
}
}
}
return 0;
}
static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
qemu_mutex_lock(&iscsilun->mutex);
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
iscsilun->request_timed_out = true;
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
return;
goto out;
}
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
out:
qemu_mutex_unlock(&iscsilun->mutex);
}
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1474,20 +1460,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
}
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the iscsi image",
},
{ /* end of list */ }
},
};
static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
int evpd, int pc, void **inq, Error **errp)
{
@@ -1605,24 +1577,178 @@ out:
}
}
static void iscsi_parse_iscsi_option(const char *target, QDict *options)
{
QemuOptsList *list;
QemuOpts *opts;
const char *user, *password, *password_secret, *initiator_name,
*header_digest, *timeout;
list = qemu_find_opts("iscsi");
if (!list) {
return;
}
opts = qemu_opts_find(list, target);
if (opts == NULL) {
opts = QTAILQ_FIRST(&list->head);
if (!opts) {
return;
}
}
user = qemu_opt_get(opts, "user");
if (user) {
qdict_set_default_str(options, "user", user);
}
password = qemu_opt_get(opts, "password");
if (password) {
qdict_set_default_str(options, "password", password);
}
password_secret = qemu_opt_get(opts, "password-secret");
if (password_secret) {
qdict_set_default_str(options, "password-secret", password_secret);
}
initiator_name = qemu_opt_get(opts, "initiator-name");
if (initiator_name) {
qdict_set_default_str(options, "initiator-name", initiator_name);
}
header_digest = qemu_opt_get(opts, "header-digest");
if (header_digest) {
/* -iscsi takes upper case values, but QAPI only supports lower case
* enum constant names, so we have to convert here. */
char *qapi_value = g_ascii_strdown(header_digest, -1);
qdict_set_default_str(options, "header-digest", qapi_value);
g_free(qapi_value);
}
timeout = qemu_opt_get(opts, "timeout");
if (timeout) {
qdict_set_default_str(options, "timeout", timeout);
}
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static void iscsi_parse_filename(const char *filename, QDict *options,
Error **errp)
{
struct iscsi_url *iscsi_url;
const char *transport_name;
char *lun_str;
iscsi_url = iscsi_parse_full_url(NULL, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
return;
}
#if LIBISCSI_API_VERSION >= (20160603)
switch (iscsi_url->transport) {
case TCP_TRANSPORT:
transport_name = "tcp";
break;
case ISER_TRANSPORT:
transport_name = "iser";
break;
default:
error_setg(errp, "Unknown transport type (%d)",
iscsi_url->transport);
return;
}
#else
transport_name = "tcp";
#endif
qdict_set_default_str(options, "transport", transport_name);
qdict_set_default_str(options, "portal", iscsi_url->portal);
qdict_set_default_str(options, "target", iscsi_url->target);
lun_str = g_strdup_printf("%d", iscsi_url->lun);
qdict_set_default_str(options, "lun", lun_str);
g_free(lun_str);
/* User/password from -iscsi take precedence over those from the URL */
iscsi_parse_iscsi_option(iscsi_url->target, options);
if (iscsi_url->user[0] != '\0') {
qdict_set_default_str(options, "user", iscsi_url->user);
qdict_set_default_str(options, "password", iscsi_url->passwd);
}
iscsi_destroy_url(iscsi_url);
}
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "transport",
.type = QEMU_OPT_STRING,
},
{
.name = "portal",
.type = QEMU_OPT_STRING,
},
{
.name = "target",
.type = QEMU_OPT_STRING,
},
{
.name = "user",
.type = QEMU_OPT_STRING,
},
{
.name = "password",
.type = QEMU_OPT_STRING,
},
{
.name = "password-secret",
.type = QEMU_OPT_STRING,
},
{
.name = "lun",
.type = QEMU_OPT_NUMBER,
},
{
.name = "initiator-name",
.type = QEMU_OPT_STRING,
},
{
.name = "header-digest",
.type = QEMU_OPT_STRING,
},
{
.name = "timeout",
.type = QEMU_OPT_NUMBER,
},
{ /* end of list */ }
},
};
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct scsi_task *task = NULL;
struct scsi_inquiry_standard *inq = NULL;
struct scsi_inquiry_supported_pages *inq_vpd;
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int i, ret = 0, timeout = 0;
const char *transport_name, *portal, *target;
#if LIBISCSI_API_VERSION >= (20160603)
enum iscsi_transport_type transport;
#endif
int i, ret = 0, timeout = 0, lun;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1632,18 +1758,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
filename = qemu_opt_get(opts, "filename");
transport_name = qemu_opt_get(opts, "transport");
portal = qemu_opt_get(opts, "portal");
target = qemu_opt_get(opts, "target");
lun = qemu_opt_get_number(opts, "lun", 0);
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_setg(errp, "Failed to parse URL : %s", filename);
if (!transport_name || !portal || !target) {
error_setg(errp, "Need all of transport, portal and target options");
ret = -EINVAL;
goto out;
}
if (!strcmp(transport_name, "tcp")) {
#if LIBISCSI_API_VERSION >= (20160603)
transport = TCP_TRANSPORT;
} else if (!strcmp(transport_name, "iser")) {
transport = ISER_TRANSPORT;
#else
/* TCP is what older libiscsi versions always use */
#endif
} else {
error_setg(errp, "Unknown transport: %s", transport_name);
ret = -EINVAL;
goto out;
}
memset(iscsilun, 0, sizeof(IscsiLun));
initiator_name = parse_initiator_name(iscsi_url->target);
initiator_name = get_initiator_name(opts);
iscsi = iscsi_create_context(initiator_name);
if (iscsi == NULL) {
@@ -1652,30 +1794,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
#if LIBISCSI_API_VERSION >= (20160603)
if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
if (iscsi_init_transport(iscsi, transport)) {
error_setg(errp, ("Error initializing transport."));
ret = -EINVAL;
goto out;
}
#endif
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
if (iscsi_set_targetname(iscsi, target)) {
error_setg(errp, "iSCSI: Failed to set target name.");
ret = -EINVAL;
goto out;
}
if (iscsi_url->user[0] != '\0') {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
error_setg(errp, "Failed to set initiator username and password");
ret = -EINVAL;
goto out;
}
}
/* check if we got CHAP username/password via the options */
parse_chap(iscsi, iscsi_url->target, &local_err);
apply_chap(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1688,10 +1820,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
/* check if we got HEADER_DIGEST via the options */
parse_header_digest(iscsi, iscsi_url->target, &local_err);
apply_header_digest(iscsi, opts, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -1699,7 +1829,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
/* timeout handling is broken in libiscsi before 1.15.0 */
timeout = parse_timeout(iscsi_url->target);
timeout = qemu_opt_get_number(opts, "timeout", 0);
#if LIBISCSI_API_VERSION >= 20150621
iscsi_set_timeout(iscsi, timeout);
#else
@@ -1708,7 +1838,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
#endif
if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
ret = -EINVAL;
@@ -1717,7 +1847,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->iscsi = iscsi;
iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = iscsi_url->lun;
iscsilun->lun = lun;
iscsilun->has_write_same = true;
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1803,6 +1933,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
scsi_free_scsi_task(task);
task = NULL;
qemu_mutex_init(&iscsilun->mutex);
iscsi_attach_aio_context(bs, iscsilun->aio_context);
/* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1820,9 +1951,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
out:
qemu_opts_del(opts);
g_free(initiator_name);
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
if (task != NULL) {
scsi_free_scsi_task(task);
}
@@ -1851,6 +1979,7 @@ static void iscsi_close(BlockDriverState *bs)
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
iscsi_allocmap_free(iscsilun);
qemu_mutex_destroy(&iscsilun->mutex);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -2031,15 +2160,15 @@ static BlockDriver bdrv_iscsi = {
.format_name = "iscsi",
.protocol_name = "iscsi",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -2066,15 +2195,15 @@ static BlockDriver bdrv_iser = {
.format_name = "iser",
.protocol_name = "iser",
.instance_size = sizeof(IscsiLun),
.bdrv_needs_filename = true,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.instance_size = sizeof(IscsiLun),
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
.bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,

View File

@@ -54,10 +54,10 @@ struct LinuxAioState {
io_context_t ctx;
EventNotifier e;
/* io queue for submit at batch */
/* io queue for submit at batch. Protected by AioContext lock. */
LaioQueue io_q;
/* I/O completion processing */
/* I/O completion processing. Only runs in I/O thread. */
QEMUBH *completion_bh;
int event_idx;
int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
* that!
*/
if (!qemu_coroutine_entered(laiocb->co)) {
qemu_coroutine_enter(laiocb->co);
aio_co_wake(laiocb->co);
}
} else {
laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,9 +234,12 @@ static void qemu_laio_process_completions(LinuxAioState *s)
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
qemu_laio_process_completions(s);
aio_context_acquire(s->aio_context);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
aio_context_release(s->aio_context);
}
static void qemu_laio_completion_bh(void *opaque)
@@ -455,6 +458,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
{
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
qemu_bh_delete(s->completion_bh);
s->aio_context = NULL;
}
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)

View File

@@ -69,6 +69,7 @@ typedef struct MirrorBlockJob {
bool waiting_for_io;
int target_cluster_sectors;
int max_iov;
bool initial_zeroing_ongoing;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -117,9 +118,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
if (!s->initial_zeroing_ongoing) {
s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
}
}
qemu_iovec_destroy(&op->qiov);
g_free(op);
@@ -132,6 +134,8 @@ static void mirror_write_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -142,12 +146,15 @@ static void mirror_write_complete(void *opaque, int ret)
}
}
mirror_iteration_done(op, ret);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static void mirror_read_complete(void *opaque, int ret)
{
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
if (ret < 0) {
BlockErrorAction action;
@@ -158,10 +165,11 @@ static void mirror_read_complete(void *opaque, int ret)
}
mirror_iteration_done(op, ret);
return;
} else {
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
}
blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
0, mirror_write_complete, op);
aio_context_release(blk_get_aio_context(s->common.blk));
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -378,7 +386,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk);
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
int64_t ret;
int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
@@ -566,6 +574,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0;
}
s->initial_zeroing_ongoing = true;
for (sector_num = 0; sector_num < end; ) {
int nb_sectors = MIN(end - sector_num,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -573,6 +582,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
mirror_throttle(s);
if (block_job_is_cancelled(&s->common)) {
s->initial_zeroing_ongoing = false;
return 0;
}
@@ -587,6 +597,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
}
mirror_wait_for_all_io(s);
s->initial_zeroing_ongoing = false;
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -651,7 +662,28 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->bdev_length < 0) {
ret = s->bdev_length;
goto immediate_exit;
} else if (s->bdev_length == 0) {
}
/* Active commit must resize the base image if its size differs from the
* active layer. */
if (s->base == blk_bs(s->target)) {
int64_t base_length;
base_length = blk_getlength(s->target);
if (base_length < 0) {
ret = base_length;
goto immediate_exit;
}
if (s->bdev_length > base_length) {
ret = blk_truncate(s->target, s->bdev_length);
if (ret < 0) {
goto immediate_exit;
}
}
}
if (s->bdev_length == 0) {
/* Report BLOCK_JOB_READY and wait for complete. */
block_job_event_ready(&s->common);
s->synced = true;
@@ -1052,9 +1084,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
int64_t length, base_length;
int orig_base_flags;
int ret;
Error *local_err = NULL;
orig_base_flags = bdrv_get_flags(base);
@@ -1063,31 +1093,6 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
return;
}
length = bdrv_getlength(bs);
if (length < 0) {
error_setg_errno(errp, -length,
"Unable to determine length of %s", bs->filename);
goto error_restore_flags;
}
base_length = bdrv_getlength(base);
if (base_length < 0) {
error_setg_errno(errp, -base_length,
"Unable to determine length of %s", base->filename);
goto error_restore_flags;
}
if (length > base_length) {
ret = bdrv_truncate(base, length);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Top image %s is larger than base image %s, and "
"resize of base image failed",
bs->filename, base->filename);
goto error_restore_flags;
}
}
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,

View File

@@ -33,8 +33,9 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i;
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
@@ -42,6 +43,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
BDRV_POLL_WHILE(bs, s->read_reply_co);
}
static void nbd_teardown_connection(BlockDriverState *bs)
@@ -56,7 +58,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
qio_channel_shutdown(client->ioc,
QIO_CHANNEL_SHUTDOWN_BOTH,
NULL);
nbd_recv_coroutines_enter_all(client);
nbd_recv_coroutines_enter_all(bs);
nbd_client_detach_aio_context(bs);
object_unref(OBJECT(client->sioc));
@@ -65,54 +67,43 @@ static void nbd_teardown_connection(BlockDriverState *bs)
client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
static coroutine_fn void nbd_read_reply_entry(void *opaque)
{
BlockDriverState *bs = opaque;
NBDClientSession *s = nbd_get_client_session(bs);
NBDClientSession *s = opaque;
uint64_t i;
int ret;
if (!s->ioc) { /* Already closed */
return;
}
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
for (;;) {
assert(s->reply.handle == 0);
ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
if (ret < 0) {
s->reply.handle = 0;
goto fail;
break;
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes.
*/
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
break;
}
/* We're woken up by the recv_coroutine itself. Note that there
* is no race between yielding and reentering read_reply_co. This
* is because:
*
* - if recv_coroutine[i] runs on the same AioContext, it is only
* entered after we yield
*
* - if recv_coroutine[i] runs on a different AioContext, reentering
* read_reply_co happens through a bottom half, which can only
* run after we yield.
*/
aio_co_wake(s->recv_coroutine[i]);
qemu_coroutine_yield();
}
/* There's no need for a mutex on the receive side, because the
* handler acts as a synchronization point and ensures that only
* one coroutine is called until the reply finishes. */
i = HANDLE_TO_INDEX(s, s->reply.handle);
if (i >= MAX_NBD_REQUESTS) {
goto fail;
}
if (s->recv_coroutine[i]) {
qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
fail:
nbd_teardown_connection(bs);
}
static void nbd_restart_write(void *opaque)
{
BlockDriverState *bs = opaque;
qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
s->read_reply_co = NULL;
}
static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +111,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
QEMUIOVector *qiov)
{
NBDClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
int rc, ret, i;
qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +131,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
return -EPIPE;
}
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, nbd_restart_write, NULL, bs);
if (qiov) {
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
@@ -160,9 +145,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
} else {
rc = nbd_send_request(s->ioc, request);
}
aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, NULL, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
}
@@ -174,8 +156,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
{
int ret;
/* Wait until we're woken up by the read handler. TODO: perhaps
* peek at the next reply and avoid yielding if it's ours? */
/* Wait until we're woken up by nbd_read_reply_entry. */
qemu_coroutine_yield();
*reply = s->reply;
if (reply->handle != request->handle ||
@@ -201,7 +182,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema);
qemu_co_queue_wait(&s->free_sema, NULL);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
@@ -209,13 +190,19 @@ static void nbd_coroutine_start(NBDClientSession *s,
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(NBDClientSession *s,
static void nbd_coroutine_end(BlockDriverState *bs,
NBDRequest *request)
{
NBDClientSession *s = nbd_get_client_session(bs);
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
if (s->in_flight-- == MAX_NBD_REQUESTS) {
qemu_co_queue_next(&s->free_sema);
s->in_flight--;
qemu_co_queue_next(&s->free_sema);
/* Kick the read_reply_co to get the next reply. */
if (s->read_reply_co) {
aio_co_wake(s->read_reply_co);
}
}
@@ -241,7 +228,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -271,7 +258,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -306,7 +293,7 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -331,7 +318,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
@@ -357,23 +344,23 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
nbd_coroutine_end(bs, &request);
return -reply.error;
}
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
nbd_get_client_session(bs)->sioc->fd,
false, NULL, NULL, NULL, NULL);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
false, nbd_reply_ready, NULL, NULL, bs);
NBDClientSession *client = nbd_get_client_session(bs);
qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
aio_co_schedule(new_context, client->read_reply_co);
}
void nbd_client_close(BlockDriverState *bs)
@@ -434,7 +421,7 @@ int nbd_client_init(BlockDriverState *bs,
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");

View File

@@ -25,7 +25,7 @@ typedef struct NBDClientSession {
CoMutex send_mutex;
CoQueue free_sema;
Coroutine *send_coroutine;
Coroutine *read_reply_co;
int in_flight;
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

View File

@@ -537,8 +537,6 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
visit_complete(ov, &saddr_qdict);
visit_free(ov);
assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", saddr_qdict);
if (s->export) {

View File

@@ -54,6 +54,7 @@ typedef struct NFSClient {
int events;
bool has_zero_init;
AioContext *aio_context;
QemuMutex mutex;
blkcnt_t st_blocks;
bool cache_used;
NFSServer *server;
@@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
static void nfs_process_read(void *arg);
static void nfs_process_write(void *arg);
/* Called with QemuMutex held. */
static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
@@ -208,15 +210,21 @@ static void nfs_set_events(NFSClient *client)
static void nfs_process_read(void *arg)
{
NFSClient *client = arg;
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLIN);
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_process_write(void *arg)
{
NFSClient *client = arg;
qemu_mutex_lock(&client->mutex);
nfs_service(client->context, POLLOUT);
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
}
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -231,10 +239,12 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
static void nfs_co_generic_bh_cb(void *opaque)
{
NFSRPC *task = opaque;
task->complete = 1;
qemu_coroutine_enter(task->co);
aio_co_wake(task->co);
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
@@ -256,9 +266,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
nfs_co_generic_bh_cb, task);
}
static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
@@ -266,14 +276,15 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
nfs_co_init_task(bs, &task);
task.iov = iov;
qemu_mutex_lock(&client->mutex);
if (nfs_pread_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
nfs_co_generic_cb, &task) != 0) {
offset, bytes, nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -290,39 +301,50 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
return 0;
}
static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *iov,
int flags)
{
NFSClient *client = bs->opaque;
NFSRPC task;
char *buf = NULL;
bool my_buffer = false;
nfs_co_init_task(bs, &task);
buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
if (nb_sectors && buf == NULL) {
return -ENOMEM;
if (iov->niov != 1) {
buf = g_try_malloc(bytes);
if (bytes && buf == NULL) {
return -ENOMEM;
}
qemu_iovec_to_buf(iov, 0, buf, bytes);
my_buffer = true;
} else {
buf = iov->iov[0].iov_base;
}
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
qemu_mutex_lock(&client->mutex);
if (nfs_pwrite_async(client->context, client->fh,
sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
buf, nfs_co_generic_cb, &task) != 0) {
g_free(buf);
offset, bytes, buf,
nfs_co_generic_cb, &task) != 0) {
qemu_mutex_unlock(&client->mutex);
if (my_buffer) {
g_free(buf);
}
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
g_free(buf);
if (my_buffer) {
g_free(buf);
}
if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
if (task.ret != bytes) {
return task.ret < 0 ? task.ret : -EIO;
}
@@ -336,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
nfs_co_init_task(bs, &task);
qemu_mutex_lock(&client->mutex);
if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
qemu_mutex_unlock(&client->mutex);
return -ENOMEM;
}
nfs_set_events(client);
qemu_mutex_unlock(&client->mutex);
while (!task.complete) {
qemu_coroutine_yield();
}
@@ -427,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
{
NFSClient *client = bs->opaque;
nfs_client_close(client);
qemu_mutex_destroy(&client->mutex);
}
static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -634,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
if (ret < 0) {
return ret;
}
qemu_mutex_init(&client->mutex);
bs->total_sectors = ret;
ret = 0;
return ret;
@@ -689,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
return client->has_zero_init;
}
/* Called (via nfs_service) with QemuMutex held. */
static void
nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
@@ -798,8 +826,6 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
ov = qobject_output_visitor_new(&server_qdict);
visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
visit_complete(ov, &server_qdict);
assert(qobject_type(server_qdict) == QTYPE_QDICT);
qdict_put_obj(opts, "server", server_qdict);
qdict_put(opts, "path", qstring_from_str(client->path));
@@ -856,8 +882,8 @@ static BlockDriver bdrv_nfs = {
.bdrv_create = nfs_file_create,
.bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
.bdrv_co_preadv = nfs_co_preadv,
.bdrv_co_pwritev = nfs_co_pwritev,
.bdrv_co_flush_to_disk = nfs_co_flush,
.bdrv_detach_aio_context = nfs_detach_aio_context,

View File

@@ -215,7 +215,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
s->data_end << BDRV_SECTOR_BITS,
space << BDRV_SECTOR_BITS, 0);
} else {
ret = bdrv_truncate(bs->file->bs,
ret = bdrv_truncate(bs->file,
(s->data_end + space) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
@@ -449,7 +449,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
size - res->image_end_offset);
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
ret = bdrv_truncate(bs->file, res->image_end_offset);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -581,6 +581,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
@@ -681,7 +687,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
goto fail_options;
}
if (!bdrv_has_zero_init(bs->file->bs) ||
bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -724,7 +730,7 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);

View File

@@ -682,7 +682,6 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
qobject_decref(obj);

View File

@@ -106,6 +106,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
QCowHeader header;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
@@ -467,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
@@ -909,7 +915,7 @@ static int qcow_make_empty(BlockDriverState *bs)
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;

View File

@@ -932,9 +932,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
qemu_co_mutex_unlock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests);
qemu_co_mutex_lock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
return -EAGAIN;
}
}

View File

@@ -1734,7 +1734,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
goto resize_fail;
}
ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
ret = bdrv_truncate(bs->file, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}

View File

@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
@@ -1205,6 +1205,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return qcow2_do_open(bs, options, flags, errp);
}
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
@@ -1785,7 +1797,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
options = qdict_clone_shallow(bs->options);
flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
ret = qcow2_do_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
error_propagate(errp, local_err);
@@ -2570,7 +2582,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file->bs);
return bdrv_truncate(bs->file->bs, cluster_offset);
return bdrv_truncate(bs->file, cluster_offset);
}
buf = qemu_blockalign(bs, s->cluster_size);
@@ -2784,7 +2796,7 @@ static int make_completely_empty(BlockDriverState *bs)
goto fail;
}
ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -3250,7 +3262,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
ret = bdrv_truncate(bs, new_size);
BlockBackend *blk = blk_new();
blk_insert_bs(blk, bs);
ret = blk_truncate(blk, new_size);
blk_unref(blk);
if (ret < 0) {
return ret;
}

View File

@@ -83,6 +83,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
unsigned int index;
unsigned int n;
qed_acquire(s);
if (ret) {
goto out;
}
@@ -109,6 +110,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
out:
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
qed_release(s);
g_free(find_cluster_cb);
}

View File

@@ -31,6 +31,7 @@ static void qed_read_table_cb(void *opaque, int ret)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
BDRVQEDState *s = read_table_cb->s;
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
@@ -40,13 +41,15 @@ static void qed_read_table_cb(void *opaque, int ret)
}
/* Byteswap offsets */
qed_acquire(s);
for (i = 0; i < noffsets; i++) {
table->offsets[i] = le64_to_cpu(table->offsets[i]);
}
qed_release(s);
out:
/* Completion */
trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
trace_qed_read_table_cb(s, read_table_cb->table, ret);
gencb_complete(&read_table_cb->gencb, ret);
}
@@ -84,8 +87,9 @@ typedef struct {
static void qed_write_table_cb(void *opaque, int ret)
{
QEDWriteTableCB *write_table_cb = opaque;
BDRVQEDState *s = write_table_cb->s;
trace_qed_write_table_cb(write_table_cb->s,
trace_qed_write_table_cb(s,
write_table_cb->orig_table,
write_table_cb->flush,
ret);
@@ -97,8 +101,10 @@ static void qed_write_table_cb(void *opaque, int ret)
if (write_table_cb->flush) {
/* We still need to flush first */
write_table_cb->flush = false;
qed_acquire(s);
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
write_table_cb);
qed_release(s);
return;
}
@@ -213,6 +219,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
CachedL2Table *l2_table = request->l2_table;
uint64_t l2_offset = read_l2_table_cb->l2_offset;
qed_acquire(s);
if (ret) {
/* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(l2_table);
@@ -228,6 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(request->l2_table != NULL);
}
qed_release(s);
gencb_complete(&read_l2_table_cb->gencb, ret);
}

View File

@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table;
}
static void qed_aio_next_io(void *opaque, int ret);
static void qed_aio_next_io(QEDAIOCB *acb, int ret);
static void qed_aio_start_io(QEDAIOCB *acb)
{
qed_aio_next_io(acb, 0);
}
static void qed_aio_next_io_cb(void *opaque, int ret)
{
QEDAIOCB *acb = opaque;
qed_aio_next_io(acb, ret);
}
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
{
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
}
}
@@ -333,10 +345,22 @@ static void qed_need_check_timer_cb(void *opaque)
trace_qed_need_check_timer_cb(s);
qed_acquire(s);
qed_plug_allocating_write_reqs(s);
/* Ensure writes are on disk before clearing flag */
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
qed_release(s);
}
void qed_acquire(BDRVQEDState *s)
{
aio_context_acquire(bdrv_get_aio_context(s->bs));
}
void qed_release(BDRVQEDState *s)
{
aio_context_release(bdrv_get_aio_context(s->bs));
}
static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -391,8 +415,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
}
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQEDState *s = bs->opaque;
QEDHeader le_header;
@@ -526,6 +550,18 @@ out:
return ret;
}
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
return bdrv_qed_do_open(bs, options, flags, errp);
}
static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
@@ -721,7 +757,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
}
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -918,6 +954,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
static void qed_aio_complete_bh(void *opaque)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
BlockCompletionFunc *cb = acb->common.cb;
void *user_opaque = acb->common.opaque;
int ret = acb->bh_ret;
@@ -925,7 +962,9 @@ static void qed_aio_complete_bh(void *opaque)
qemu_aio_unref(acb);
/* Invoke callback */
qed_acquire(s);
cb(user_opaque, ret);
qed_release(s);
}
static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -959,7 +998,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
} else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s);
}
@@ -984,7 +1023,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
assert(acb->request.l2_table != NULL);
qed_aio_next_io(opaque, ret);
qed_aio_next_io(acb, ret);
}
/**
@@ -1032,11 +1071,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
if (need_alloc) {
/* Write out the whole new L2 table */
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
qed_aio_write_l1_update, acb);
qed_aio_write_l1_update, acb);
} else {
/* Write out only the updated part of the L2 table */
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
}
return;
@@ -1088,7 +1127,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
next_fn = qed_aio_next_io_cb;
} else {
if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1240,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
if (acb->flags & QED_AIOCB_ZERO) {
/* Skip ahead if the clusters are already zero */
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
}
@@ -1321,18 +1360,18 @@ static void qed_aio_read_data(void *opaque, int ret,
/* Handle zero cluster and backing file reads */
if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return;
} else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
&acb->backing_qiov, qed_aio_next_io, acb);
&acb->backing_qiov, qed_aio_next_io_cb, acb);
return;
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
qed_aio_next_io_cb, acb);
return;
err:
@@ -1342,9 +1381,8 @@ err:
/**
* Begin next I/O or complete the request
*/
static void qed_aio_next_io(void *opaque, int ret)
static void qed_aio_next_io(QEDAIOCB *acb, int ret)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1438,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
/* Start request */
qed_aio_next_io(acb, 0);
qed_aio_start_io(acb);
return &acb->common;
}
@@ -1436,7 +1474,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
cb->done = true;
cb->ret = ret;
if (cb->co) {
qemu_coroutine_enter(cb->co);
aio_co_wake(cb->co);
}
}
@@ -1603,7 +1641,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "Could not reopen qed layer: ");

View File

@@ -198,6 +198,9 @@ enum {
*/
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
void qed_acquire(BDRVQEDState *s);
void qed_release(BDRVQEDState *s);
/**
* Generic callback for chaining async callbacks
*/

View File

@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
s->size = offset;
offset += s->offset;
return bdrv_truncate(bs->file->bs, offset);
return bdrv_truncate(bs->file, offset);
}
static int raw_media_changed(BlockDriverState *bs)
@@ -384,6 +384,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
BDRVRawState *s = bs->opaque;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;

View File

@@ -62,6 +62,13 @@
#define RBD_MAX_SNAP_NAME_SIZE 128
#define RBD_MAX_SNAPS 100
/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
#ifdef LIBRBD_SUPPORTS_IOVEC
#define LIBRBD_USE_IOVEC 1
#else
#define LIBRBD_USE_IOVEC 0
#endif
typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
@@ -310,6 +317,17 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
return ret;
}
static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
{
if (LIBRBD_USE_IOVEC) {
RBDAIOCB *acb = rcb->acb;
iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
acb->qiov->size - offs);
} else {
memset(rcb->buf + offs, 0, rcb->size - offs);
}
}
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
@@ -426,11 +444,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
} else {
if (r < 0) {
memset(rcb->buf, 0, rcb->size);
qemu_rbd_memset(rcb, 0);
acb->ret = r;
acb->error = 1;
} else if (r < rcb->size) {
memset(rcb->buf + r, 0, rcb->size - r);
qemu_rbd_memset(rcb, r);
if (!acb->error) {
acb->ret = rcb->size;
}
@@ -441,10 +459,13 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
g_free(rcb);
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
if (!LIBRBD_USE_IOVEC) {
if (acb->cmd == RBD_AIO_READ) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
}
qemu_vfree(acb->bounce);
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
qemu_aio_unref(acb);
@@ -655,7 +676,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
char *buf;
int r;
BDRVRBDState *s = bs->opaque;
@@ -664,27 +684,29 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->cmd = cmd;
acb->qiov = qiov;
assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
rcb = g_new(RADOSCB, 1);
if (!LIBRBD_USE_IOVEC) {
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
}
}
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
rcb->buf = acb->bounce;
}
acb->ret = 0;
acb->error = 0;
acb->s = s;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
}
buf = acb->bounce;
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;
rcb->size = size;
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +716,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
switch (cmd) {
case RBD_AIO_WRITE:
r = rbd_aio_write(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_write(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_READ:
r = rbd_aio_read(s->image, off, size, buf, c);
#ifdef LIBRBD_SUPPORTS_IOVEC
r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
#else
r = rbd_aio_read(s->image, off, size, rcb->buf, c);
#endif
break;
case RBD_AIO_DISCARD:
r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -712,14 +742,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
if (r < 0) {
goto failed_completion;
}
return &acb->common;
failed_completion:
rbd_aio_release(c);
failed:
g_free(rcb);
qemu_vfree(acb->bounce);
if (!LIBRBD_USE_IOVEC) {
qemu_vfree(acb->bounce);
}
qemu_aio_unref(acb);
return NULL;
}

View File

@@ -86,6 +86,12 @@ static int replication_open(BlockDriverState *bs, QDict *options,
const char *mode;
const char *top_id;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
ret = -EINVAL;
opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);

View File

@@ -486,7 +486,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
retry:
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(acb, cb)) {
qemu_co_queue_wait(&s->overlapping_queue);
qemu_co_queue_wait(&s->overlapping_queue, NULL);
goto retry;
}
}
@@ -575,13 +575,6 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
static void restart_co_req(void *opaque)
{
Coroutine *co = opaque;
qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
int sockfd;
BlockDriverState *bs;
@@ -592,12 +585,19 @@ typedef struct SheepdogReqCo {
unsigned int *rlen;
int ret;
bool finished;
Coroutine *co;
} SheepdogReqCo;
static void restart_co_req(void *opaque)
{
SheepdogReqCo *srco = opaque;
aio_co_wake(srco->co);
}
static coroutine_fn void do_co_req(void *opaque)
{
int ret;
Coroutine *co;
SheepdogReqCo *srco = opaque;
int sockfd = srco->sockfd;
SheepdogReq *hdr = srco->hdr;
@@ -605,9 +605,9 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *wlen = srco->wlen;
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
srco->co = qemu_coroutine_self();
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, restart_co_req, NULL, co);
NULL, restart_co_req, NULL, srco);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
@@ -615,7 +615,7 @@ static coroutine_fn void do_co_req(void *opaque)
}
aio_set_fd_handler(srco->aio_context, sockfd, false,
restart_co_req, NULL, NULL, co);
restart_co_req, NULL, NULL, srco);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -643,6 +643,7 @@ out:
aio_set_fd_handler(srco->aio_context, sockfd, false,
NULL, NULL, NULL, NULL);
srco->co = NULL;
srco->ret = ret;
srco->finished = true;
if (srco->bs) {
@@ -866,7 +867,7 @@ static void coroutine_fn aio_read_response(void *opaque)
* We've finished all requests which belong to the AIOCB, so
* we can switch back to sd_co_readv/writev now.
*/
qemu_coroutine_enter(acb->coroutine);
aio_co_wake(acb->coroutine);
}
return;
@@ -883,14 +884,14 @@ static void co_read_response(void *opaque)
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
qemu_coroutine_enter(s->co_recv);
aio_co_wake(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
qemu_coroutine_enter(s->co_send);
aio_co_wake(s->co_send);
}
/*

View File

@@ -889,10 +889,14 @@ static void restart_coroutine(void *opaque)
DPRINTF("co=%p", co);
qemu_coroutine_enter(co);
aio_co_wake(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -912,25 +916,10 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, rd_handler, wr_handler, NULL, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
false, NULL, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
set_fd_handler(s, bs);
qemu_coroutine_yield();
clear_fd_handler(s, bs);
DPRINTF("s->sock=%d - back", s->sock);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
NULL, NULL, NULL, NULL);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position

View File

@@ -326,7 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
@@ -416,7 +416,9 @@ static void timer_cb(BlockBackend *blk, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
aio_context_acquire(blk_get_aio_context(blk));
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
aio_context_release(blk_get_aio_context(blk));
/* If the request queue was empty then we have to take care of
* scheduling the next one */

View File

@@ -363,6 +363,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
logout("\n");
ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);

View File

@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
bdrv_truncate(bs->file->bs, new_file_size);
bdrv_truncate(bs->file, new_file_size);
}
}
qemu_vfree(desc_entries);

View File

@@ -898,6 +898,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
uint64_t signature;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
s->bat = NULL;
s->first_visible_write = true;
@@ -1165,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
return bdrv_truncate(bs->file, *new_offset + s->block_size);
}
/*

View File

@@ -943,6 +943,12 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t magic;
Error *local_err = NULL;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;

View File

@@ -220,6 +220,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
false, errp);
if (!bs->file) {
return -EINVAL;
}
opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {

View File

@@ -2968,6 +2968,7 @@ static void write_target_close(BlockDriverState *bs) {
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
.instance_size = sizeof(void*),
.bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
@@ -3036,14 +3037,13 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
unlink(s->qcow_filename);
#endif
backing = bdrv_new();
backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
&error_abort);
*(void**) backing->opaque = s;
bdrv_set_backing_hd(s->bs, backing);
bdrv_unref(backing);
s->bs->backing->bs->drv = &vvfat_write_target;
s->bs->backing->bs->opaque = g_new(void *, 1);
*(void**)s->bs->backing->bs->opaque = s;
return 0;
err:

View File

@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
HANDLE hIOCP;
EventNotifier e;
int count;
bool is_aio_context_attached;
AioContext *aio_ctx;
};
typedef struct QEMUWin32AIOCB {
@@ -87,7 +87,6 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
qemu_vfree(waiocb->buf);
}
waiocb->common.cb(waiocb->common.opaque, ret);
qemu_aio_unref(waiocb);
}
@@ -176,13 +175,13 @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
aio->is_aio_context_attached = false;
aio->aio_ctx = NULL;
}
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
AioContext *new_context)
{
aio->is_aio_context_attached = true;
aio->aio_ctx = new_context;
aio_set_event_notifier(new_context, &aio->e, false,
win32_aio_completion_cb, NULL);
}
@@ -212,7 +211,7 @@ out_free_state:
void win32_aio_cleanup(QEMUWin32AIOState *aio)
{
assert(!aio->is_aio_context_attached);
assert(!aio->aio_ctx);
CloseHandle(aio->hIOCP);
event_notifier_cleanup(&aio->e);
g_free(aio);

View File

@@ -52,6 +52,7 @@
#include "sysemu/arch_init.h"
#include "qemu/cutils.h"
#include "qemu/help_option.h"
#include "qemu/throttle-options.h"
static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -227,27 +228,30 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
return NULL;
}
bool drive_check_orphaned(void)
void drive_check_orphaned(void)
{
BlockBackend *blk;
DriveInfo *dinfo;
bool rs = false;
Location loc;
bool orphans = false;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/* If dinfo->bdrv->dev is NULL, it has no device attached. */
/* Unless this is a default drive, this may be an oversight. */
if (!blk_get_attached_dev(blk) && !dinfo->is_default &&
dinfo->type != IF_NONE) {
fprintf(stderr, "Warning: Orphaned drive without device: "
"id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
rs = true;
loc_push_none(&loc);
qemu_opts_loc_restore(dinfo->opts);
error_report("machine type does not support"
" if=%s,bus=%d,unit=%d",
if_name[dinfo->type], dinfo->bus, dinfo->unit);
loc_pop(&loc);
orphans = true;
}
}
return rs;
if (orphans) {
exit(1);
}
}
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
@@ -2855,6 +2859,7 @@ void qmp_block_resize(bool has_device, const char *device,
int64_t size, Error **errp)
{
Error *local_err = NULL;
BlockBackend *blk = NULL;
BlockDriverState *bs;
AioContext *aio_context;
int ret;
@@ -2885,10 +2890,13 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
blk = blk_new();
blk_insert_bs(blk, bs);
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
ret = bdrv_truncate(bs, size);
ret = blk_truncate(blk, size);
switch (ret) {
case 0:
break;
@@ -2910,6 +2918,7 @@ void qmp_block_resize(bool has_device, const char *device,
}
out:
blk_unref(blk);
aio_context_release(aio_context);
}
@@ -3999,83 +4008,11 @@ QemuOptsList qemu_common_drive_opts = {
.name = BDRV_OPT_READ_ONLY,
.type = QEMU_OPT_BOOL,
.help = "open drive file as read-only",
},{
.name = "throttling.iops-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total I/O operations per second",
},{
.name = "throttling.iops-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read operations per second",
},{
.name = "throttling.iops-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write operations per second",
},{
.name = "throttling.bps-total",
.type = QEMU_OPT_NUMBER,
.help = "limit total bytes per second",
},{
.name = "throttling.bps-read",
.type = QEMU_OPT_NUMBER,
.help = "limit read bytes per second",
},{
.name = "throttling.bps-write",
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
.name = "throttling.iops-total-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations burst",
},{
.name = "throttling.iops-read-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations read burst",
},{
.name = "throttling.iops-write-max",
.type = QEMU_OPT_NUMBER,
.help = "I/O operations write burst",
},{
.name = "throttling.bps-total-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes burst",
},{
.name = "throttling.bps-read-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes read burst",
},{
.name = "throttling.bps-write-max",
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
.name = "throttling.iops-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-total-max burst period, in seconds",
},{
.name = "throttling.iops-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-read-max burst period, in seconds",
},{
.name = "throttling.iops-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the iops-write-max burst period, in seconds",
},{
.name = "throttling.bps-total-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-total-max burst period, in seconds",
},{
.name = "throttling.bps-read-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-read-max burst period, in seconds",
},{
.name = "throttling.bps-write-max-length",
.type = QEMU_OPT_NUMBER,
.help = "length of the bps-write-max burst period, in seconds",
},{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
.help = "when limiting by iops max size of an I/O in bytes",
},{
},
THROTTLE_OPTS,
{
.name = "throttling.group",
.type = QEMU_OPT_STRING,
.help = "name of the block throttling group",

15
configure vendored
View File

@@ -3378,7 +3378,7 @@ fi
fdt_required=no
for target in $target_list; do
case $target in
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
fdt_required=yes
;;
esac
@@ -3396,11 +3396,11 @@ fi
if test "$fdt" != "no" ; then
fdt_libs="-lfdt"
# explicitly check for libfdt_env.h as it is missing in some stable installs
# and test for required functions to make sure we are on a version >= 1.4.0
# and test for required functions to make sure we are on a version >= 1.4.2
cat > $TMPC << EOF
#include <libfdt.h>
#include <libfdt_env.h>
int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
int main(void) { fdt_first_subnode(0, 0); return 0; }
EOF
if compile_prog "" "$fdt_libs" ; then
# system DTC is good - use it
@@ -3418,7 +3418,7 @@ EOF
fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
elif test "$fdt" = "yes" ; then
# have neither and want - prompt for system/submodule install
error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
" (1) Preferred: Install the DTC (libfdt) devel package" \
" (2) Fetch the DTC submodule, using:" \
" git submodule update --init dtc"
@@ -5879,6 +5879,7 @@ mkdir -p $target_dir
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
mttcg="no"
interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
@@ -5893,15 +5894,18 @@ case "$target_name" in
TARGET_BASE_ARCH=i386
;;
alpha)
mttcg="yes"
;;
arm|armeb)
TARGET_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
aarch64)
TARGET_BASE_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
cris)
@@ -6066,6 +6070,9 @@ if test "$target_bigendian" = "yes" ; then
fi
if test "$target_softmmu" = "yes" ; then
echo "CONFIG_SOFTMMU=y" >> $config_target_mak
if test "$mttcg" = "yes" ; then
echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
fi
fi
if test "$target_user_only" = "yes" ; then
echo "CONFIG_USER_ONLY=y" >> $config_target_mak

View File

@@ -23,9 +23,6 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
bool exit_request;
CPUState *tcg_current_cpu;
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{

View File

@@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -227,20 +228,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
static void cpu_exec_step(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
mmap_lock();
tb_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
tb_unlock();
mmap_unlock();
cc->cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
cc->cpu_exec_exit(cpu);
tb_lock();
tb_phys_invalidate(tb, -1);
tb_free(tb);
tb_unlock();
} else {
/* We may have exited due to another problem here, so we need
* to reset any tb_locks we may have taken but didn't release.
* The mmap_lock is dropped by tb_gen_code if it runs out of
* memory.
*/
#ifndef CONFIG_SOFTMMU
tcg_debug_assert(!have_mmap_lock());
#endif
tb_lock_reset();
}
}
void cpu_exec_step_atomic(CPUState *cpu)
@@ -384,12 +408,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu)) {
current_cpu = NULL;
return true;
}
@@ -439,7 +464,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
if (replay_exception()) {
CPUClass *cc = CPU_GET_CLASS(cpu);
qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
@@ -465,9 +492,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
TranslationBlock **last_tb)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int interrupt_request = cpu->interrupt_request;
if (unlikely(interrupt_request)) {
if (unlikely(atomic_read(&cpu->interrupt_request))) {
int interrupt_request;
qemu_mutex_lock_iothread();
interrupt_request = cpu->interrupt_request;
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -475,6 +504,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
@@ -484,6 +514,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
@@ -494,12 +525,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#endif
@@ -522,7 +555,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
@@ -548,15 +586,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
/* Something asked us to stop executing
* chained TBs; just continue round the main
* loop. Whatever requested the exit will also
* have set something else (eg exit_request or
* interrupt_request) which we will handle
* next time around the loop. But we need to
* ensure the zeroing of tcg_exit_req (see cpu_tb_exec)
* comes before the next read of cpu->exit_request
* or cpu->interrupt_request.
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
* will also have set something else (eg interrupt_request)
* which we will handle next time around the loop. But we
* need to ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request or
* cpu->interrupt_request.
*/
smp_mb();
*last_tb = NULL;
@@ -608,13 +644,8 @@ int cpu_exec(CPUState *cpu)
return EXCP_HALTED;
}
atomic_mb_set(&tcg_current_cpu, cpu);
rcu_read_lock();
if (unlikely(atomic_mb_read(&exit_request))) {
cpu->exit_request = 1;
}
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
@@ -640,6 +671,9 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
if (qemu_mutex_iothread_locked()) {
qemu_mutex_unlock_iothread();
}
}
/* if an exception is pending, we execute it here */
@@ -659,10 +693,5 @@ int cpu_exec(CPUState *cpu)
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_cpu, NULL);
return ret;
}

345
cpus.c
View File

@@ -25,6 +25,7 @@
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
@@ -45,6 +46,7 @@
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
@@ -150,6 +152,77 @@ typedef struct TimersState {
} TimersState;
static TimersState timers_state;
bool mttcg_enabled;
/*
* We default to false if we know other options have been enabled
* which are currently incompatible with MTTCG. Otherwise when each
* guest (target) has been updated to support:
* - atomic instructions
* - memory ordering primitives (barriers)
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
*
* Once a guest architecture has been converted to the new primitives
* there are two remaining limitations to check.
*
* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
* - The host must have a stronger memory order than the guest
*
* It may be possible in future to support strong guests on weak hosts
* but that will require tagging all load/stores in a guest with their
* implicit memory order requirements which would likely slow things
* down a lot.
*/
static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
return false;
#endif
}
static bool default_mttcg_enabled(void)
{
QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
const char *rr = qemu_opt_get(icount_opts, "rr");
if (rr || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
return check_tcg_memory_orders_compatible();
#else
return false;
#endif
}
}
void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
const char *t = qemu_opt_get(opts, "thread");
if (t) {
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else {
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
error_printf("This may cause strange/hard to debug errors");
}
mttcg_enabled = true;
}
} else if (strcmp(t, "single") == 0) {
mttcg_enabled = false;
} else {
error_setg(errp, "Invalid 'thread' setting %s", t);
}
} else {
mttcg_enabled = default_mttcg_enabled();
}
}
int64_t cpu_get_icount_raw(void)
{
@@ -694,6 +767,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
/* TCG vCPU kick timer
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
* timer event with force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
* idleness is complete.
*/
static QEMUTimer *tcg_kick_vcpu_timer;
static CPUState *tcg_current_rr_cpu;
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
CPUState *cpu;
do {
cpu = atomic_mb_read(&tcg_current_rr_cpu);
if (cpu) {
cpu_exit(cpu);
}
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
qemu_cpu_kick_rr_cpu();
}
static void start_tcg_kick_timer(void)
{
if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
if (tcg_kick_vcpu_timer) {
timer_del(tcg_kick_vcpu_timer);
tcg_kick_vcpu_timer = NULL;
}
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
@@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
@@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
@@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
static void qemu_wait_io_event_common(CPUState *cpu)
{
atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
static bool qemu_tcg_should_sleep(CPUState *cpu)
{
if (mttcg_enabled) {
return cpu_thread_is_idle(cpu);
} else {
return all_cpu_threads_idle();
}
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (all_cpu_threads_idle()) {
while (qemu_tcg_should_sleep(cpu)) {
stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
while (iothread_requesting_mutex) {
qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
CPU_FOREACH(cpu) {
qemu_wait_io_event_common(cpu);
}
qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
current_cpu = cpu;
while (1) {
current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
}
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
/* Single-threaded TCG
*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
* This is done explicitly rather than relying on side-effects
* elsewhere.
*/
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
@@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
/* process any pending work */
atomic_mb_set(&exit_request, 1);
start_tcg_kick_timer();
cpu = first_cpu;
/* process any pending work */
cpu->exit_request = 1;
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
atomic_mb_set(&tcg_current_rr_cpu, cpu);
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
break;
}
} else if (cpu->stop || cpu->stopped) {
} else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
} /* for cpu.. */
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
/* Pairs with smp_wmb in qemu_cpu_kick. */
atomic_mb_set(&exit_request, 0);
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_rr_cpu, NULL);
if (cpu && cpu->exit_request) {
atomic_mb_set(&cpu->exit_request, 0);
}
handle_icount_deadline();
qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
@@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
}
#endif
/* Multi-threaded TCG
*
* In the multi-threaded case each vCPU has its own thread. The TLS
* variable current_cpu can be used deep in the code to find the
* current CPUState for a given thread.
*/
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
current_cpu = cpu;
qemu_cond_signal(&qemu_cpu_cond);
/* process any pending work */
cpu->exit_request = 1;
while (1) {
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
break;
case EXCP_HALTED:
/* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
default:
/* Ignore everything else? */
break;
}
}
handle_icount_deadline();
atomic_mb_set(&cpu->exit_request, 0);
qemu_tcg_wait_io_event(cpu);
}
return NULL;
}
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
@@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
#endif
}
static void qemu_cpu_kick_no_halt(void)
{
CPUState *cpu;
/* Ensure whatever caused the exit has reached the CPU threads before
* writing exit_request.
*/
atomic_mb_set(&exit_request, 1);
cpu = atomic_mb_read(&tcg_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
qemu_cpu_kick_no_halt();
cpu_exit(cpu);
/* NOP unless doing single-thread RR */
qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
@@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
void qemu_mutex_lock_iothread(void)
{
atomic_inc(&iothread_requesting_mutex);
/* In the simple case there is no need to bump the VCPU thread out of
* TCG code execution.
*/
if (!tcg_enabled() || qemu_in_vcpu_thread() ||
!first_cpu || !first_cpu->created) {
qemu_mutex_lock(&qemu_global_mutex);
atomic_dec(&iothread_requesting_mutex);
} else {
if (qemu_mutex_trylock(&qemu_global_mutex)) {
qemu_cpu_kick_no_halt();
qemu_mutex_lock(&qemu_global_mutex);
}
atomic_dec(&iothread_requesting_mutex);
qemu_cond_broadcast(&qemu_io_proceeded_cond);
}
g_assert(!qemu_mutex_iothread_locked());
qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
@@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
if (!kvm_enabled()) {
CPU_FOREACH(cpu) {
cpu->stop = false;
cpu->stopped = true;
}
return;
}
}
while (!all_vcpus_paused()) {
@@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
static QemuCond *tcg_halt_cond;
static QemuThread *tcg_cpu_thread;
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
tcg_halt_cond = cpu->halt_cond;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
if (qemu_tcg_mttcg_enabled()) {
/* create a thread per vCPU with TCG (MTTCG) */
parallel_cpus = true;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_rr_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
}
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
tcg_cpu_thread = cpu->thread;
} else {
cpu->thread = tcg_cpu_thread;
cpu->halt_cond = tcg_halt_cond;
/* For non-MTTCG cases we share the thread */
cpu->thread = single_tcg_cpu_thread;
cpu->halt_cond = single_tcg_halt_cond;
}
}

486
cputlb.c
View File

@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@@ -57,6 +58,40 @@
} \
} while (0)
#define assert_cpu_is_self(this_cpu) do { \
if (DEBUG_TLB_GATE) { \
g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
} \
} while (0)
/* run_on_cpu_data.target_ptr should always be big enough for a
* target_ulong even on 32 bit builds */
QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
/* flush_all_helper: run fn across all cpus
*
* If the wait flag is set then the src cpu's helper will be queued as
* "safe" work and the loop exited creating a synchronisation point
* where all queued work will be finished before execution starts
* again.
*/
static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
run_on_cpu_data d)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (cpu != src) {
async_run_on_cpu(cpu, fn, d);
}
}
}
/* statistics */
int tlb_flush_count;
@@ -65,10 +100,22 @@ int tlb_flush_count;
* flushing more entries than required is only an efficiency issue,
* not a correctness issue.
*/
void tlb_flush(CPUState *cpu)
static void tlb_flush_nocheck(CPUState *cpu)
{
CPUArchState *env = cpu->env_ptr;
/* The QOM tests will trigger tlb_flushes without setting up TCG
* so we bug out here in that case.
*/
if (!tcg_enabled()) {
return;
}
assert_cpu_is_self(cpu);
tlb_debug("(count: %d)\n", tlb_flush_count++);
tb_lock();
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -76,39 +123,117 @@ void tlb_flush(CPUState *cpu)
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
tlb_flush_count++;
tb_unlock();
atomic_mb_set(&cpu->pending_tlb_flush, 0);
}
static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
{
tlb_flush_nocheck(cpu);
}
void tlb_flush(CPUState *cpu)
{
if (cpu->created && !qemu_cpu_is_self(cpu)) {
if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
async_run_on_cpu(cpu, tlb_flush_global_async_work,
RUN_ON_CPU_NULL);
}
} else {
tlb_flush_nocheck(cpu);
}
}
void tlb_flush_all_cpus(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
fn(src_cpu, RUN_ON_CPU_NULL);
}
void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
}
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
unsigned long mmu_idx_bitmask = data.host_int;
int mmu_idx;
tlb_debug("start\n");
assert_cpu_is_self(cpu);
for (;;) {
int mmu_idx = va_arg(argp, int);
tb_lock();
if (mmu_idx < 0) {
break;
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
tlb_debug("done\n");
tb_unlock();
}
void tlb_flush_by_mmuidx(CPUState *cpu, ...)
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
va_list argp;
va_start(argp, cpu);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
if (!qemu_cpu_is_self(cpu)) {
uint16_t pending_flushes = idxmap;
pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
if (pending_flushes) {
tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
atomic_or(&cpu->pending_tlb_flush, pending_flushes);
async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
RUN_ON_CPU_HOST_INT(pending_flushes));
}
} else {
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(idxmap));
}
}
void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
}
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
{
if (addr == (tlb_entry->addr_read &
@@ -121,12 +246,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
}
}
void tlb_flush_page(CPUState *cpu, target_ulong addr)
static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr = (target_ulong) data.target_ptr;
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
/* Check if we need to flush due to large pages. */
@@ -156,15 +284,62 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
tb_flush_jmp_cache(cpu, addr);
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_flush_page_async_work,
RUN_ON_CPU_TARGET_PTR(addr));
} else {
tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
}
}
/* As we are going to hijack the bottom bits of the page address for a
* mmuidx bit mask we need to fail to build if we can't do that
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
int i, k;
va_list argp;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
int mmu_idx;
int i;
va_start(argp, addr);
assert_cpu_is_self(cpu);
tlb_debug("addr "TARGET_FMT_lx"\n", addr);
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
page, addr, mmu_idx_bitmap);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
/* check whether there are vltb entries that need to be flushed */
for (i = 0; i < CPU_VTLB_SIZE; i++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
}
}
}
tb_flush_jmp_cache(cpu, addr);
}
static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@@ -172,33 +347,80 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
return;
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
} else {
tlb_flush_page_by_mmuidx_async_work(cpu, data);
}
}
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
{
target_ulong addr_and_mmu_idx;
for (;;) {
int mmu_idx = va_arg(argp, int);
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
if (mmu_idx < 0) {
break;
}
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
tlb_debug("idx %d\n", mmu_idx);
tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
/* check whether there are vltb entries that need to be flushed */
for (k = 0; k < CPU_VTLB_SIZE; k++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
}
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
} else {
tlb_check_page_and_flush_by_mmuidx_async_work(
cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
va_end(argp);
}
tb_flush_jmp_cache(cpu, addr);
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
fn(src, RUN_ON_CPU_TARGET_PTR(addr));
}
void tlb_flush_page_all_cpus_synced(CPUState *src,
target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
}
/* update the TLBs so that writes to code in the virtual page 'addr'
@@ -216,36 +438,84 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
{
return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
}
void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
/*
* Dirty write flag handling
*
* When the TCG code writes to a location it looks up the address in
* the TLB and uses that data to compute the final address. If any of
* the lower bits of the address are set then the slow path is forced.
* There are a number of reasons to do this but for normal RAM the
* most usual is detecting writes to code regions which may invalidate
* generated code.
*
* Because we want other vCPUs to respond to changes straight away we
* update the te->addr_write field atomically. If the TLB entry has
* been changed by the vCPU in the mean time we skip the update.
*
* As this function uses atomic accesses we also need to ensure
* updates to tlb_entries follow the same access rules. We don't need
* to worry about this for oversized guests as MTTCG is disabled for
* them.
*/
static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
uintptr_t length)
{
uintptr_t addr;
#if TCG_OVERSIZED_GUEST
uintptr_t addr = tlb_entry->addr_write;
if (tlb_is_dirty_ram(tlb_entry)) {
addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += tlb_entry->addend;
if ((addr - start) < length) {
tlb_entry->addr_write |= TLB_NOTDIRTY;
}
}
}
#else
/* paired with atomic_mb_set in tlb_set_page_with_attrs */
uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
uintptr_t addr = orig_addr;
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += atomic_read(&tlb_entry->addend);
if ((addr - start) < length) {
uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
}
}
return ram_addr;
#endif
}
/* For atomic correctness when running MTTCG we need to use the right
* primitives when copying entries */
static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
bool atomic_set)
{
#if TCG_OVERSIZED_GUEST
*d = *s;
#else
if (atomic_set) {
d->addr_read = s->addr_read;
d->addr_code = s->addr_code;
atomic_set(&d->addend, atomic_read(&s->addend));
/* Pairs with flag setting in tlb_reset_dirty_range */
atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
} else {
d->addr_read = s->addr_read;
d->addr_write = atomic_read(&s->addr_write);
d->addr_code = s->addr_code;
d->addend = atomic_read(&s->addend);
}
#endif
}
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
* the target vCPU). As such care needs to be taken that we don't
* dangerously race with another vCPU update. The only thing actually
* updated is the target TLB entry ->addr_write flags.
*/
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
{
CPUArchState *env;
@@ -283,6 +553,8 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -337,11 +609,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
target_ulong address;
target_ulong code_address;
uintptr_t addend;
CPUTLBEntry *te;
CPUTLBEntry *te, *tv, tn;
hwaddr iotlb, xlat, sz;
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
assert_cpu_is_self(cpu);
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
@@ -371,41 +644,50 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
te = &env->tlb_table[mmu_idx][index];
/* do not discard the translation in te, evict it into a victim tlb */
env->tlb_v_table[mmu_idx][vidx] = *te;
tv = &env->tlb_v_table[mmu_idx][vidx];
/* addr_write can race with tlb_reset_dirty_range */
copy_tlb_helper(tv, te, true);
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
te->addend = addend - vaddr;
/* Now calculate the new entry */
tn.addend = addend - vaddr;
if (prot & PAGE_READ) {
te->addr_read = address;
tn.addr_read = address;
} else {
te->addr_read = -1;
tn.addr_read = -1;
}
if (prot & PAGE_EXEC) {
te->addr_code = code_address;
tn.addr_code = code_address;
} else {
te->addr_code = -1;
tn.addr_code = -1;
}
tn.addr_write = -1;
if (prot & PAGE_WRITE) {
if ((memory_region_is_ram(section->mr) && section->readonly)
|| memory_region_is_romd(section->mr)) {
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
tn.addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& cpu_physical_memory_is_clean(
memory_region_get_ram_addr(section->mr) + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
tn.addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
tn.addr_write = address;
}
} else {
te->addr_write = -1;
}
/* Pairs with flag setting in tlb_reset_dirty_range */
copy_tlb_helper(te, &tn, true);
/* atomic_mb_set(&te->addr_write, write_address); */
}
/* Add a new TLB entry, but without specifying the memory
@@ -452,6 +734,18 @@ static void report_bad_exec(CPUState *cpu, target_ulong addr)
log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
}
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
error_report("Bad ram pointer %p", ptr);
abort();
}
return ram_addr;
}
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
@@ -475,14 +769,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
if (memory_region_is_unassigned(mr)) {
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->do_unassigned_access) {
cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
} else {
report_bad_exec(cpu, addr);
exit(1);
}
cpu_unassigned_access(cpu, addr, false, true, 0, 4);
/* The CPU's unassigned access hook might have longjumped out
* with an exception. If it didn't (or there was no hook) then
* we can't proceed further.
*/
report_bad_exec(cpu, addr);
exit(1);
}
p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
return qemu_ram_addr_from_host_nofail(p);
@@ -495,6 +788,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
uint64_t val;
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
@@ -503,7 +797,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
cpu->mem_io_vaddr = addr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
return val;
}
@@ -514,15 +817,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
}
/* Return true if ADDR is present in the victim tlb, and has been copied
@@ -538,10 +849,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
copy_tlb_helper(&tmptlb, tlb, false);
copy_tlb_helper(tlb, vtlb, true);
copy_tlb_helper(vtlb, &tmptlb, true);
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
tmpio = *io; *io = *vio; *vio = tmpio;
return true;
}

View File

@@ -63,18 +63,14 @@ static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
size_t qcrypto_cipher_get_block_len(QCryptoCipherAlgorithm alg)
{
if (alg >= G_N_ELEMENTS(alg_key_len)) {
return 0;
}
assert(alg < G_N_ELEMENTS(alg_key_len));
return alg_block_len[alg];
}
size_t qcrypto_cipher_get_key_len(QCryptoCipherAlgorithm alg)
{
if (alg >= G_N_ELEMENTS(alg_key_len)) {
return 0;
}
assert(alg < G_N_ELEMENTS(alg_key_len));
return alg_key_len[alg];
}

View File

@@ -48,6 +48,7 @@ static int qcrypto_ivgen_essiv_init(QCryptoIVGen *ivgen,
&salt, &nhash,
errp) < 0) {
g_free(essiv);
g_free(salt);
return -1;
}

View File

@@ -10,3 +10,6 @@ CONFIG_JAZZ=y
CONFIG_G364FB=y
CONFIG_JAZZ_LED=y
CONFIG_VT82C686=y
CONFIG_MIPS_BOSTON=y
CONFIG_FITLOADER=y
CONFIG_PCI_XILINX=y

View File

@@ -166,8 +166,10 @@ static void dma_blk_cb(void *opaque, int ret)
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
aio_context_acquire(dbs->ctx);
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
aio_context_release(dbs->ctx);
assert(dbs->acb);
}

350
docs/multi-thread-tcg.txt Normal file
View File

@@ -0,0 +1,350 @@
Copyright (c) 2015-2016 Linaro Ltd.
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
Introduction
============
This document outlines the design for multi-threaded TCG system-mode
emulation. The current user-mode emulation mirrors the thread
structure of the translated executable. Some of the work will be
applicable to both system and linux-user emulation.
The original system-mode TCG implementation was single threaded and
dealt with multiple CPUs with simple round-robin scheduling. This
simplified a lot of things but became increasingly limited as systems
being emulated gained additional cores and per-core performance gains
for host systems started to level off.
vCPU Scheduling
===============
We introduce a new running mode where each vCPU will run on its own
user-space thread. This will be enabled by default for all FE/BE
combinations that have had the required work done to support this
safely.
In the general case of running translated code there should be no
inter-vCPU dependencies and all vCPUs should be able to run at full
speed. Synchronisation will only be required while accessing internal
shared data structures or when the emulated architecture requires a
coherent representation of the emulated machine state.
Shared Data Structures
======================
Main Run Loop
-------------
Even when there is no code being generated there are a number of
structures associated with the hot-path through the main run-loop.
These are associated with looking up the next translation block to
execute. These include:
tb_jmp_cache (per-vCPU, cache of recent jumps)
tb_ctx.htable (global hash table, phys address->tb lookup)
As TB linking only occurs when blocks are in the same page this code
is critical to performance as looking up the next TB to execute is the
most common reason to exit the generated code.
DESIGN REQUIREMENT: Make access to lookup structures safe with
multiple reader/writer threads. Minimise any lock contention to do it.
The hot-path avoids using locks where possible. The tb_jmp_cache is
updated with atomic accesses to ensure consistent results. The fall
back QHT based hash table is also designed for lockless lookups. Locks
are only taken when code generation is required or TranslationBlocks
have their block-to-block jumps patched.
Global TCG State
----------------
We need to protect the entire code generation cycle including any post
generation patching of the translated code. This also implies a shared
translation buffer which contains code running on all cores. Any
execution path that comes to the main run loop will need to hold a
mutex for code generation. This also includes times when we need flush
code or entries from any shared lookups/caches. Structures held on a
per-vCPU basis won't need locking unless other vCPUs will need to
modify them.
DESIGN REQUIREMENT: Add locking around all code generation and TB
patching.
(Current solution)
Mainly as part of the linux-user work all code generation is
serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
place of mmap_lock() in linux-user.
Translation Blocks
------------------
Currently the whole system shares a single code generation buffer
which when full will force a flush of all translations and start from
scratch again. Some operations also force a full flush of translations
including:
- debugging operations (breakpoint insertion/removal)
- some CPU helper functions
This is done with the async_safe_run_on_cpu() mechanism to ensure all
vCPUs are quiescent when changes are being made to shared global
structures.
More granular translation invalidation events are typically due
to a change of the state of a physical page:
- code modification (self modify code, patching code)
- page changes (new page mapping in linux-user mode)
While setting the invalid flag in a TranslationBlock will stop it
being used when looked up in the hot-path there are a number of other
book-keeping structures that need to be safely cleared.
Any TranslationBlocks which have been patched to jump directly to the
now invalid blocks need the jump patches reversing so they will return
to the C code.
There are a number of look-up caches that need to be properly updated
including the:
- jump lookup cache
- the physical-to-tb lookup hash table
- the global page table
The global page table (l1_map) which provides a multi-level look-up
for PageDesc structures which contain pointers to the start of a
linked list of all Translation Blocks in that page (see page_next).
Both the jump patching and the page cache involve linked lists that
the invalidated TranslationBlock needs to be removed from.
DESIGN REQUIREMENT: Safely handle invalidation of TBs
- safely patch/revert direct jumps
- remove central PageDesc lookup entries
- ensure lookup caches/hashes are safely updated
(Current solution)
The direct jump themselves are updated atomically by the TCG
tb_set_jmp_target() code. Modification to the linked lists that allow
searching for linked pages are done under the protect of the
tb_lock().
The global page table is protected by the tb_lock() in system-mode and
mmap_lock() in linux-user mode.
The lookup caches are updated atomically and the lookup hash uses QHT
which is designed for concurrent safe lookup.
Memory maps and TLBs
--------------------
The memory handling code is fairly critical to the speed of memory
access in the emulated system. The SoftMMU code is designed so the
hot-path can be handled entirely within translated code. This is
handled with a per-vCPU TLB structure which once populated will allow
a series of accesses to the page to occur without exiting the
translated code. It is possible to set flags in the TLB address which
will ensure the slow-path is taken for each access. This can be done
to support:
- Memory regions (dividing up access to PIO, MMIO and RAM)
- Dirty page tracking (for code gen, SMC detection, migration and display)
- Virtual TLB (for translating guest address->real address)
When the TLB tables are updated by a vCPU thread other than their own
we need to ensure it is done in a safe way so no inconsistent state is
seen by the vCPU thread.
Some operations require updating a number of vCPUs TLBs at the same
time in a synchronised manner.
DESIGN REQUIREMENTS:
- TLB Flush All/Page
- can be across-vCPUs
- cross vCPU TLB flush may need other vCPU brought to halt
- change may need to be visible to the calling vCPU immediately
- TLB Flag Update
- usually cross-vCPU
- want change to be visible as soon as possible
- TLB Update (update a CPUTLBEntry, via tlb_set_page_with_attrs)
- This is a per-vCPU table - by definition can't race
- updated by its own thread when the slow-path is forced
(Current solution)
We have updated cputlb.c to defer operations when a cross-vCPU
operation with async_run_on_cpu() which ensures each vCPU sees a
coherent state when it next runs its work (in a few instructions
time).
A new set up operations (tlb_flush_*_all_cpus) take an additional flag
which when set will force synchronisation by setting the source vCPUs
work as "safe work" and exiting the cpu run loop. This ensure by the
time execution restarts all flush operations have completed.
TLB flag updates are all done atomically and are also protected by the
tb_lock() which is used by the functions that update the TLB in bulk.
(Known limitation)
Not really a limitation but the wait mechanism is overly strict for
some architectures which only need flushes completed by a barrier
instruction. This could be a future optimisation.
Emulated hardware state
-----------------------
Currently thanks to KVM work any access to IO memory is automatically
protected by the global iothread mutex, also known as the BQL (Big
Qemu Lock). Any IO region that doesn't use global mutex is expected to
do its own locking.
However IO memory isn't the only way emulated hardware state can be
modified. Some architectures have model specific registers that
trigger hardware emulation features. Generally any translation helper
that needs to update more than a single vCPUs of state should take the
BQL.
As the BQL, or global iothread mutex is shared across the system we
push the use of the lock as far down into the TCG code as possible to
minimise contention.
(Current solution)
MMIO access automatically serialises hardware emulation by way of the
BQL. Currently ARM targets serialise all ARM_CP_IO register accesses
and also defer the reset/startup of vCPUs to the vCPU context by way
of async_run_on_cpu().
Updates to interrupt state are also protected by the BQL as they can
often be cross vCPU.
Memory Consistency
==================
Between emulated guests and host systems there are a range of memory
consistency models. Even emulating weakly ordered systems on strongly
ordered hosts needs to ensure things like store-after-load re-ordering
can be prevented when the guest wants to.
Memory Barriers
---------------
Barriers (sometimes known as fences) provide a mechanism for software
to enforce a particular ordering of memory operations from the point
of view of external observers (e.g. another processor core). They can
apply to any memory operations as well as just loads or stores.
The Linux kernel has an excellent write-up on the various forms of
memory barrier and the guarantees they can provide [1].
Barriers are often wrapped around synchronisation primitives to
provide explicit memory ordering semantics. However they can be used
by themselves to provide safe lockless access by ensuring for example
a change to a signal flag will only be visible once the changes to
payload are.
DESIGN REQUIREMENT: Add a new tcg_memory_barrier op
This would enforce a strong load/store ordering so all loads/stores
complete at the memory barrier. On single-core non-SMP strongly
ordered backends this could become a NOP.
Aside from explicit standalone memory barrier instructions there are
also implicit memory ordering semantics which comes with each guest
memory access instruction. For example all x86 load/stores come with
fairly strong guarantees of sequential consistency where as ARM has
special variants of load/store instructions that imply acquire/release
semantics.
In the case of a strongly ordered guest architecture being emulated on
a weakly ordered host the scope for a heavy performance impact is
quite high.
DESIGN REQUIREMENTS: Be efficient with use of memory barriers
- host systems with stronger implied guarantees can skip some barriers
- merge consecutive barriers to the strongest one
(Current solution)
The system currently has a tcg_gen_mb() which will add memory barrier
operations if code generation is being done in a parallel context. The
tcg_optimize() function attempts to merge barriers up to their
strongest form before any load/store operations. The solution was
originally developed and tested for linux-user based systems. All
backends have been converted to emit fences when required. So far the
following front-ends have been updated to emit fences when required:
- target-i386
- target-arm
- target-aarch64
- target-alpha
- target-mips
Memory Control and Maintenance
------------------------------
This includes a class of instructions for controlling system cache
behaviour. While QEMU doesn't model cache behaviour these instructions
are often seen when code modification has taken place to ensure the
changes take effect.
Synchronisation Primitives
--------------------------
There are two broad types of synchronisation primitives found in
modern ISAs: atomic instructions and exclusive regions.
The first type offer a simple atomic instruction which will guarantee
some sort of test and conditional store will be truly atomic w.r.t.
other cores sharing access to the memory. The classic example is the
x86 cmpxchg instruction.
The second type offer a pair of load/store instructions which offer a
guarantee that an region of memory has not been touched between the
load and store instructions. An example of this is ARM's ldrex/strex
pair where the strex instruction will return a flag indicating a
successful store only if no other CPU has accessed the memory region
since the ldrex.
Traditionally TCG has generated a series of operations that work
because they are within the context of a single translation block so
will have completed before another CPU is scheduled. However with
the ability to have multiple threads running to emulate multiple CPUs
we will need to explicitly expose these semantics.
DESIGN REQUIREMENTS:
- Support classic atomic instructions
- Support load/store exclusive (or load link/store conditional) pairs
- Generic enough infrastructure to support all guest architectures
CURRENT OPEN QUESTIONS:
- How problematic is the ABA problem in general?
(Current solution)
The TCG provides a number of atomic helpers (tcg_gen_atomic_*) which
can be used directly or combined to emulate other instructions like
ARM's ldrex/strex instructions. While they are susceptible to the ABA
problem so far common guests have not implemented patterns where
this may be a problem - typically presenting a locking ABI which
assumes cmpxchg like semantics.
The code also includes a fall-back for cases where multi-threaded TCG
ops can't work (e.g. guest atomic width > host atomic width). In this
case an EXCP_ATOMIC exit occurs and the instruction is emulated with
an exclusive lock which ensures all emulation is serialised.
While the atomic helpers look good enough for now there may be a need
to look at solutions that can more closely model the guest
architectures semantics.
==========
[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/Documentation/memory-barriers.txt

124
docs/nvdimm.txt Normal file
View File

@@ -0,0 +1,124 @@
QEMU Virtual NVDIMM
===================
This document explains the usage of virtual NVDIMM (vNVDIMM) feature
which is available since QEMU v2.6.0.
The current QEMU only implements the persistent memory mode of vNVDIMM
device and not the block window mode.
Basic Usage
-----------
The storage of a vNVDIMM device in QEMU is provided by the memory
backend (i.e. memory-backend-file and memory-backend-ram). A simple
way to create a vNVDIMM device at startup time is done via the
following command line options:
-machine pc,nvdimm
-m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
-object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
-device nvdimm,id=nvdimm1,memdev=mem1
Where,
- the "nvdimm" machine option enables vNVDIMM feature.
- "slots=$N" should be equal to or larger than the total amount of
normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
- "maxmem=$MAX_SIZE" should be equal to or larger than the total size
of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
>= $RAM_SIZE + $NVDIMM_SIZE here.
- "object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
accesses to the virtual NVDIMM device go to the file $PATH.
"share=on/off" controls the visibility of guest writes. If
"share=on", then guest writes will be applied to the backend
file. If another guest uses the same backend file with option
"share=on", then above writes will be visible to it as well. If
"share=off", then guest writes won't be applied to the backend
file and thus will be invisible to other guests.
- "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
device whose storage is provided by above memory backend device.
Multiple vNVDIMM devices can be created if multiple pairs of "-object"
and "-device" are provided.
For above command line options, if the guest OS has the proper NVDIMM
driver, it should be able to detect a NVDIMM device which is in the
persistent memory mode and whose size is $NVDIMM_SIZE.
Note:
1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
backend file size is not equal to the size given by "size" option,
QEMU will truncate the backend file by ftruncate(2), which will
corrupt the existing data in the backend file, especially for the
shrink case.
QEMU v2.8.0 and later check the backend file size and the "size"
option. If they do not match, QEMU will report errors and abort in
order to avoid the data corruption.
2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
option of memory-backend-file, e.g. 4KB alignment on x86. However,
QEMU v.2.7.0 puts an additional alignment requirement, which may
require a larger value than the basic one, e.g. 2MB on x86. This
change breaks the usage of memory-backend-file that only satisfies
the basic alignment.
QEMU v2.8.0 and later remove the additional alignment on non-s390x
architectures, so the broken memory-backend-file can work again.
Label
-----
QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
To enable label on vNVDIMM devices, users can simply add
"label-size=$SZ" option to "-device nvdimm", e.g.
-device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
Note:
1. The minimal label size is 128KB.
2. QEMU v2.7.0 and later store labels at the end of backend storage.
If a memory backend file, which was previously used as the backend
of a vNVDIMM device without labels, is now used for a vNVDIMM
device with label, the data in the label area at the end of file
will be inaccessible to the guest. If any useful data (e.g. the
meta-data of the file system) was stored there, the latter usage
may result guest data corruption (e.g. breakage of guest file
system).
Hotplug
-------
QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
accomplished by two monitor commands "object_add" and "device_add".
For example, the following commands add another 4GB vNVDIMM device to
the guest:
(qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
(qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
Note:
1. Each hotplugged vNVDIMM device consumes one memory slot. Users
should always ensure the memory option "-m ...,slots=N" specifies
enough number of slots, i.e.
N >= number of RAM devices +
number of statically plugged vNVDIMM devices +
number of hotplugged vNVDIMM devices
2. The similar is required for the memory option "-m ...,maxmem=M", i.e.
M >= size of RAM devices +
size of statically plugged vNVDIMM devices +
size of hotplugged vNVDIMM devices

View File

@@ -1,6 +1,8 @@
\input texinfo
@setfilename qemu-ga-ref.info
@include version.texi
@exampleindent 0
@paragraphindent 0

View File

@@ -1,6 +1,8 @@
\input texinfo
@setfilename qemu-qmp-ref.info
@include version.texi
@exampleindent 0
@paragraphindent 0

View File

@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
the outer world into the log. In replay mode packets from the log are
injected into the network device. All interactions with network backend
in replay mode are disabled.
Audio devices
-------------
Audio data is recorded and replay automatically. The command line for recording
and replaying must contain identical specifications of audio hardware, e.g.:
-soundhw ac97

View File

@@ -61,6 +61,7 @@ PCI devices (other than virtio):
1b36:0009 PCI Expander Bridge (-device pxb)
1b36:000a PCI-PCI bridge (multiseat)
1b36:000b PCIe Expander Bridge (-device pxb-pcie)
1b36:000d PCI xhci usb host adapter
All these devices are documented in docs/specs.

2
dtc

Submodule dtc updated: 65cc4d2748...ec02b34c05

13
exec.c
View File

@@ -2134,9 +2134,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
cpu->watchpoint_hit = wp;
/* The tb_lock will be reset when cpu_loop_exit or
* cpu_loop_exit_noexc longjmp back into the cpu_exec
* main loop.
/* Both tb_lock and iothread_mutex will be reset when
* cpu_loop_exit or cpu_loop_exit_noexc longjmp
* back into the cpu_exec main loop.
*/
tb_lock();
tb_check_watchpoint(cpu);
@@ -2371,8 +2371,14 @@ static void io_mem_init(void)
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
/* io_mem_notdirty calls tb_invalidate_phys_page_fast,
* which can be called without the iothread mutex.
*/
memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
NULL, UINT64_MAX);
memory_region_clear_global_locking(&io_mem_notdirty);
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}
@@ -3166,6 +3172,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
xen_invalidate_map_cache_entry(cache->ptr);
}
memory_region_unref(cache->mr);
cache->mr = NULL;
}
/* Called from RCU critical section. This function has the same

View File

@@ -623,6 +623,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
case float_round_down:
roundIncrement = zSign ? 0x3ff : 0;
break;
case float_round_to_odd:
roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
break;
default:
abort();
}
@@ -632,8 +635,10 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
|| ( ( zExp == 0x7FD )
&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
) {
bool overflow_to_inf = roundingMode != float_round_to_odd &&
roundIncrement != 0;
float_raise(float_flag_overflow | float_flag_inexact, status);
return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
}
if ( zExp < 0 ) {
if (status->flush_to_zero) {
@@ -651,6 +656,13 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
if (isTiny && roundBits) {
float_raise(float_flag_underflow, status);
}
if (roundingMode == float_round_to_odd) {
/*
* For round-to-odd case, the roundIncrement depends on
* zSig which just changed.
*/
roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
}
}
}
if (roundBits) {
@@ -1149,6 +1161,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
case float_round_down:
increment = zSign && zSig2;
break;
case float_round_to_odd:
increment = !(zSig1 & 0x1) && zSig2;
break;
default:
abort();
}
@@ -1168,6 +1183,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
if ( ( roundingMode == float_round_to_zero )
|| ( zSign && ( roundingMode == float_round_up ) )
|| ( ! zSign && ( roundingMode == float_round_down ) )
|| (roundingMode == float_round_to_odd)
) {
return
packFloat128(
@@ -1215,6 +1231,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
case float_round_down:
increment = zSign && zSig2;
break;
case float_round_to_odd:
increment = !(zSig1 & 0x1) && zSig2;
break;
default:
abort();
}
@@ -6108,6 +6127,93 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point value
| `a' to the 64-bit unsigned integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
| according to the current rounding mode. If `a' is a NaN, the largest
| positive integer is returned. If the conversion overflows, the
| largest unsigned integer is returned. If 'a' is negative, the value is
| rounded and zero is returned; negative values that do not round to zero
| will raise the inexact exception.
*----------------------------------------------------------------------------*/
uint64_t float128_to_uint64(float128 a, float_status *status)
{
flag aSign;
int aExp;
int shiftCount;
uint64_t aSig0, aSig1;
aSig0 = extractFloat128Frac0(a);
aSig1 = extractFloat128Frac1(a);
aExp = extractFloat128Exp(a);
aSign = extractFloat128Sign(a);
if (aSign && (aExp > 0x3FFE)) {
float_raise(float_flag_invalid, status);
if (float128_is_any_nan(a)) {
return LIT64(0xFFFFFFFFFFFFFFFF);
} else {
return 0;
}
}
if (aExp) {
aSig0 |= LIT64(0x0001000000000000);
}
shiftCount = 0x402F - aExp;
if (shiftCount <= 0) {
if (0x403E < aExp) {
float_raise(float_flag_invalid, status);
return LIT64(0xFFFFFFFFFFFFFFFF);
}
shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
} else {
shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
}
return roundAndPackUint64(aSign, aSig0, aSig1, status);
}
uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
{
uint64_t v;
signed char current_rounding_mode = status->float_rounding_mode;
set_float_rounding_mode(float_round_to_zero, status);
v = float128_to_uint64(a, status);
set_float_rounding_mode(current_rounding_mode, status);
return v;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the 32-bit unsigned integer format. The conversion
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic except that the conversion is always rounded toward zero.
| If `a' is a NaN, the largest positive integer is returned. Otherwise,
| if the conversion overflows, the largest unsigned integer is returned.
| If 'a' is negative, the value is rounded and zero is returned; negative
| values that do not round to zero will raise the inexact exception.
*----------------------------------------------------------------------------*/
uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
{
uint64_t v;
uint32_t res;
int old_exc_flags = get_float_exception_flags(status);
v = float128_to_uint64_round_to_zero(a, status);
if (v > 0xffffffff) {
res = 0xffffffff;
} else {
return v;
}
set_float_exception_flags(old_exc_flags, status);
float_raise(float_flag_invalid, status);
return res;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the single-precision floating-point format. The conversion
@@ -7386,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
{
signed char current_rounding_mode = status->float_rounding_mode;
set_float_rounding_mode(float_round_to_zero, status);
int64_t v = float64_to_uint64(a, status);
uint64_t v = float64_to_uint64(a, status);
set_float_rounding_mode(current_rounding_mode, status);
return v;
}

View File

@@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o
else
common-obj-y = qemu-fsdev-dummy.o
endif
common-obj-y += qemu-fsdev-opts.o
common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o
# Toplevel always builds this; targets without virtio will put it in
# common-obj-y

View File

@@ -17,6 +17,7 @@
#include <dirent.h>
#include <utime.h>
#include <sys/vfs.h>
#include "qemu-fsdev-throttle.h"
#define SM_LOCAL_MODE_BITS 0600
#define SM_LOCAL_DIR_MODE_BITS 0700
@@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
char *path;
int export_flags;
FileOperations *ops;
FsThrottle fst;
} FsDriverEntry;
typedef struct FsContext
@@ -83,6 +85,7 @@ typedef struct FsContext
int export_flags;
struct xattr_operations **xops;
struct extended_ops exops;
FsThrottle *fst;
/* fs driver specific data */
void *private;
} FsContext;

View File

@@ -9,6 +9,7 @@
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/module.h"
#include "qemu/throttle-options.h"
static QemuOptsList qemu_fsdev_opts = {
.name = "fsdev",
@@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = {
.type = QEMU_OPT_NUMBER,
},
THROTTLE_OPTS,
{ /*End of list */ }
},
};

118
fsdev/qemu-fsdev-throttle.c Normal file
View File

@@ -0,0 +1,118 @@
/*
* Fsdev Throttle
*
* Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
*
* Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
*/
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu-fsdev-throttle.h"
#include "qemu/iov.h"
static void fsdev_throttle_read_timer_cb(void *opaque)
{
FsThrottle *fst = opaque;
qemu_co_enter_next(&fst->throttled_reqs[false]);
}
static void fsdev_throttle_write_timer_cb(void *opaque)
{
FsThrottle *fst = opaque;
qemu_co_enter_next(&fst->throttled_reqs[true]);
}
void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp)
{
throttle_config_init(&fst->cfg);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.bps-total", 0);
fst->cfg.buckets[THROTTLE_BPS_READ].avg =
qemu_opt_get_number(opts, "throttling.bps-read", 0);
fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.bps-write", 0);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.iops-total", 0);
fst->cfg.buckets[THROTTLE_OPS_READ].avg =
qemu_opt_get_number(opts, "throttling.iops-read", 0);
fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.iops-write", 0);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
fst->cfg.buckets[THROTTLE_BPS_READ].max =
qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
fst->cfg.buckets[THROTTLE_OPS_READ].max =
qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
fst->cfg.buckets[THROTTLE_BPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
fst->cfg.op_size =
qemu_opt_get_number(opts, "throttling.iops-size", 0);
throttle_is_valid(&fst->cfg, errp);
}
void fsdev_throttle_init(FsThrottle *fst)
{
if (throttle_enabled(&fst->cfg)) {
throttle_init(&fst->ts);
throttle_timers_init(&fst->tt,
qemu_get_aio_context(),
QEMU_CLOCK_REALTIME,
fsdev_throttle_read_timer_cb,
fsdev_throttle_write_timer_cb,
fst);
throttle_config(&fst->ts, &fst->tt, &fst->cfg);
qemu_co_queue_init(&fst->throttled_reqs[0]);
qemu_co_queue_init(&fst->throttled_reqs[1]);
}
}
void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
struct iovec *iov, int iovcnt)
{
if (throttle_enabled(&fst->cfg)) {
if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
!qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
}
throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
!throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
qemu_co_queue_next(&fst->throttled_reqs[is_write]);
}
}
}
void fsdev_throttle_cleanup(FsThrottle *fst)
{
if (throttle_enabled(&fst->cfg)) {
throttle_timers_destroy(&fst->tt);
}
}

View File

@@ -0,0 +1,39 @@
/*
* Fsdev Throttle
*
* Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
*
* Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
*/
#ifndef _FSDEV_THROTTLE_H
#define _FSDEV_THROTTLE_H
#include "block/aio.h"
#include "qemu/main-loop.h"
#include "qemu/coroutine.h"
#include "qapi/error.h"
#include "qemu/throttle.h"
typedef struct FsThrottle {
ThrottleState ts;
ThrottleTimers tt;
ThrottleConfig cfg;
CoQueue throttled_reqs[2];
} FsThrottle;
void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
void fsdev_throttle_init(FsThrottle *);
void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
struct iovec *, int);
void fsdev_throttle_cleanup(FsThrottle *);
#endif /* _FSDEV_THROTTLE_H */

29
hmp.c
View File

@@ -1014,8 +1014,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
const char *filename = qdict_get_str(qdict, "filename");
uint64_t addr = qdict_get_int(qdict, "val");
Error *err = NULL;
int cpu_index = monitor_get_cpu_index();
qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
if (cpu_index < 0) {
monitor_printf(mon, "No CPU available\n");
return;
}
qmp_memsave(addr, size, filename, true, cpu_index, &err);
hmp_handle_error(mon, &err);
}
@@ -1338,12 +1344,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
{
const char *param = qdict_get_str(qdict, "parameter");
const char *valuestr = qdict_get_str(qdict, "value");
int64_t valuebw = 0;
uint64_t valuebw = 0;
long valueint = 0;
char *endp;
Error *err = NULL;
bool use_int_value = false;
int i;
int i, ret;
for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
if (strcmp(param, MigrationParameter_lookup[i]) == 0) {
@@ -1379,9 +1384,9 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
break;
case MIGRATION_PARAMETER_MAX_BANDWIDTH:
p.has_max_bandwidth = true;
valuebw = qemu_strtosz(valuestr, &endp);
if (valuebw < 0 || (size_t)valuebw != valuebw
|| *endp != '\0') {
ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
if (ret < 0 || valuebw > INT64_MAX
|| (size_t)valuebw != valuebw) {
error_setg(&err, "Invalid size %s", valuestr);
goto cleanup;
}
@@ -1552,6 +1557,7 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
BlockIOThrottle throttle = {
.has_device = true,
.device = (char *) qdict_get_str(qdict, "device"),
.bps = qdict_get_int(qdict, "bps"),
.bps_rd = qdict_get_int(qdict, "bps_rd"),
@@ -2148,10 +2154,15 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
IOThreadInfoList *info;
IOThreadInfo *value;
for (info = info_list; info; info = info->next) {
monitor_printf(mon, "%s: thread_id=%" PRId64 "\n",
info->value->id, info->value->thread_id);
value = info->value;
monitor_printf(mon, "%s:\n", value->id);
monitor_printf(mon, " thread_id=%" PRId64 "\n", value->thread_id);
monitor_printf(mon, " poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
monitor_printf(mon, " poll-grow=%" PRId64 "\n", value->poll_grow);
monitor_printf(mon, " poll-shrink=%" PRId64 "\n", value->poll_shrink);
}
qapi_free_IOThreadInfoList(info_list);

View File

@@ -1208,6 +1208,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
{
const char *sec_model = qemu_opt_get(opts, "security_model");
const char *path = qemu_opt_get(opts, "path");
Error *err = NULL;
if (!sec_model) {
error_report("Security model not specified, local fs needs security model");
@@ -1236,6 +1237,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
error_report("fsdev: No path specified");
return -1;
}
fsdev_throttle_parse_opts(opts, &fse->fst, &err);
if (err) {
error_reportf_err(err, "Throttle configuration is not valid: ");
return -1;
}
fse->path = g_strdup(path);
return 0;

View File

@@ -2374,7 +2374,7 @@ static void coroutine_fn v9fs_flush(void *opaque)
/*
* Wait for pdu to complete.
*/
qemu_co_queue_wait(&cancel_pdu->complete);
qemu_co_queue_wait(&cancel_pdu->complete, NULL);
cancel_pdu->cancelled = 0;
pdu_free(cancel_pdu);
}
@@ -3010,7 +3010,6 @@ out_nofid:
*/
static void coroutine_fn v9fs_lock(void *opaque)
{
int8_t status;
V9fsFlock flock;
size_t offset = 7;
struct stat stbuf;
@@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque)
int32_t fid, err = 0;
V9fsPDU *pdu = opaque;
status = P9_LOCK_ERROR;
v9fs_string_init(&flock.client_id);
err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
&flock.flags, &flock.start, &flock.length,
@@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque)
if (err < 0) {
goto out;
}
status = P9_LOCK_SUCCESS;
err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
if (err < 0) {
goto out;
}
err += offset;
trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
out:
put_fid(pdu, fidp);
out_nofid:
err = pdu_marshal(pdu, offset, "b", status);
if (err > 0) {
err += offset;
}
trace_v9fs_lock_return(pdu->tag, pdu->id, status);
pdu_complete(pdu, err);
v9fs_string_free(&flock.client_id);
}
@@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
error_setg(errp, "share path %s is not a directory", fse->path);
goto out;
}
s->ctx.fst = &fse->fst;
fsdev_throttle_init(s->ctx.fst);
v9fs_path_free(&path);
rc = 0;
@@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
if (s->ops->cleanup) {
s->ops->cleanup(&s->ctx);
}
fsdev_throttle_cleanup(s->ctx.fst);
g_free(s->tag);
g_free(s->ctx.fs_root);
}

View File

@@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
if (v9fs_request_cancelled(pdu)) {
return -EINTR;
}
fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
v9fs_co_run_in_worker(
{
err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
if (v9fs_request_cancelled(pdu)) {
return -EINTR;
}
fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
v9fs_co_run_in_worker(
{
err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);

View File

@@ -198,7 +198,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
state->dev_count = id_list->len;
state->devs = g_new0(typeof(*state->devs), state->dev_count);
for (i = 0; i < id_list->len; i++) {
state->devs[i].cpu = id_list->cpus[i].cpu;
state->devs[i].cpu = CPU(id_list->cpus[i].cpu);
state->devs[i].arch_id = id_list->cpus[i].arch_id;
}
memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,

View File

@@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr)
static inline void tco_timer_stop(TCOIORegs *tr)
{
tr->expire_time = -1;
timer_del(tr->tco_timer);
}
static void tco_timer_expired(void *opaque)

View File

@@ -177,6 +177,7 @@ static void clipper_machine_init(MachineClass *mc)
{
mc->desc = "Alpha DP264/CLIPPER";
mc->init = clipper_init;
mc->block_default_type = IF_IDE;
mc->max_cpus = 4;
mc->is_default = 1;
}

View File

@@ -86,6 +86,11 @@ static void bcm2835_peripherals_init(Object *obj)
object_property_add_const_link(OBJECT(&s->property), "dma-mr",
OBJECT(&s->gpu_bus_mr), &error_abort);
/* Random Number Generator */
object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG);
object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL);
qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default());
/* Extended Mass Media Controller */
object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
@@ -226,6 +231,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
sysbus_connect_irq(SYS_BUS_DEVICE(&s->property), 0,
qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_PROPERTY));
/* Random Number Generator */
object_property_set_bool(OBJECT(&s->rng), true, "realized", &err);
if (err) {
error_propagate(errp, err);
return;
}
memory_region_add_subregion(&s->peri_mr, RNG_OFFSET,
sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0));
/* Extended Mass Media Controller */
object_property_set_int(OBJECT(&s->sdhci), BCM2835_SDHC_CAPAREG, "capareg",
&err);

View File

@@ -71,6 +71,8 @@ static void cubieboard_init(MachineState *machine)
memory_region_add_subregion(get_system_memory(), AW_A10_SDRAM_BASE,
&s->sdram);
/* TODO create and connect IDE devices for ide_drive_get() */
cubieboard_binfo.ram_size = machine->ram_size;
cubieboard_binfo.kernel_filename = machine->kernel_filename;
cubieboard_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -82,6 +84,8 @@ static void cubieboard_machine_init(MachineClass *mc)
{
mc->desc = "cubietech cubieboard";
mc->init = cubieboard_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("cubieboard", cubieboard_machine_init)

View File

@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/log.h"
#include "cpu.h"
#include "hw/boards.h"
#include "sysemu/sysemu.h"
@@ -74,6 +75,9 @@
/* PMU SFR base address */
#define EXYNOS4210_PMU_BASE_ADDR 0x10020000
/* Clock controller SFR base address */
#define EXYNOS4210_CLK_BASE_ADDR 0x10030000
/* Display controllers (FIMD) */
#define EXYNOS4210_FIMD0_BASE_ADDR 0x11C00000
@@ -138,6 +142,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
info->smp_loader_start);
}
static uint64_t exynos4210_calc_affinity(int cpu)
{
uint64_t mp_affinity;
/* Exynos4210 has 0x9 as cluster ID */
mp_affinity = (0x9 << ARM_AFF1_SHIFT) | cpu;
return mp_affinity;
}
Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
unsigned long ram_size)
{
@@ -163,6 +177,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
}
s->cpu[n] = ARM_CPU(cpuobj);
object_property_set_int(cpuobj, exynos4210_calc_affinity(n),
"mp-affinity", &error_abort);
object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
"reset-cbar", &error_abort);
object_property_set_bool(cpuobj, true, "realized", &error_fatal);
@@ -297,6 +313,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
*/
sysbus_create_simple("exynos4210.pmu", EXYNOS4210_PMU_BASE_ADDR, NULL);
sysbus_create_simple("exynos4210.clk", EXYNOS4210_CLK_BASE_ADDR, NULL);
/* PWM */
sysbus_create_varargs("exynos4210.pwm", EXYNOS4210_PWM_BASE_ADDR,
s->irq_table[exynos4210_get_irq(22, 0)],

View File

@@ -363,6 +363,8 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, pic[82]);
}
/* TODO create and connect IDE devices for ide_drive_get() */
highbank_binfo.ram_size = ram_size;
highbank_binfo.kernel_filename = kernel_filename;
highbank_binfo.kernel_cmdline = kernel_cmdline;
@@ -405,7 +407,8 @@ static void highbank_class_init(ObjectClass *oc, void *data)
mc->desc = "Calxeda Highbank (ECX-1000)";
mc->init = highbank_init;
mc->block_default_type = IF_SCSI;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
mc->max_cpus = 4;
}
@@ -421,7 +424,8 @@ static void midway_class_init(ObjectClass *oc, void *data)
mc->desc = "Calxeda Midway (ECX-2000)";
mc->init = midway_init;
mc->block_default_type = IF_SCSI;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
mc->max_cpus = 4;
}

View File

@@ -259,7 +259,7 @@ static void realview_init(MachineState *machine,
}
n = drive_get_max_bus(IF_SCSI);
while (n >= 0) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
lsi53c895a_create(pci_bus);
n--;
}
}
@@ -443,7 +443,6 @@ static void realview_pbx_a9_class_init(ObjectClass *oc, void *data)
mc->desc = "ARM RealView Platform Baseboard Explore for Cortex-A9";
mc->init = realview_pbx_a9_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 4;
}

View File

@@ -998,6 +998,7 @@ static void spitzpda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3000 (Spitz) PDA (PXA270)";
mc->init = spitz_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo spitzpda_type = {
@@ -1012,6 +1013,7 @@ static void borzoipda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3100 (Borzoi) PDA (PXA270)";
mc->init = borzoi_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo borzoipda_type = {
@@ -1026,6 +1028,7 @@ static void terrierpda_class_init(ObjectClass *oc, void *data)
mc->desc = "Sharp SL-C3200 (Terrier) PDA (PXA270)";
mc->init = terrier_init;
mc->block_default_type = IF_IDE;
}
static const TypeInfo terrierpda_type = {

View File

@@ -263,6 +263,7 @@ static void tosapda_machine_init(MachineClass *mc)
{
mc->desc = "Sharp SL-6000 (Tosa) PDA (PXA255)";
mc->init = tosa_init;
mc->block_default_type = IF_IDE;
}
DEFINE_MACHINE("tosa", tosapda_machine_init)

View File

@@ -290,7 +290,7 @@ static void versatile_init(MachineState *machine, int board_id)
}
n = drive_get_max_bus(IF_SCSI);
while (n >= 0) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
lsi53c895a_create(pci_bus);
n--;
}

View File

@@ -752,7 +752,6 @@ static void vexpress_class_init(ObjectClass *oc, void *data)
mc->desc = "ARM Versatile Express";
mc->init = vexpress_common_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 4;
}

View File

@@ -535,7 +535,6 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
static void create_gic(VirtMachineState *vms, qemu_irq *pic)
{
/* We create a standalone GIC */
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
DeviceState *gicdev;
SysBusDevice *gicbusdev;
const char *gictype;
@@ -605,7 +604,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
fdt_add_gic_node(vms);
if (type == 3 && !vmc->no_its) {
if (type == 3 && vms->its) {
create_its(vms, gicdev);
} else if (type == 2) {
create_v2m(vms, pic);
@@ -1378,6 +1377,7 @@ static void machvirt_init(MachineState *machine)
}
object_property_set_bool(cpuobj, true, "realized", NULL);
object_unref(cpuobj);
}
fdt_add_timer_nodes(vms);
fdt_add_cpu_nodes(vms);
@@ -1480,6 +1480,20 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
static bool virt_get_its(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
return vms->its;
}
static void virt_set_its(Object *obj, bool value, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
vms->its = value;
}
static char *virt_get_gic_version(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1554,7 @@ type_init(machvirt_machine_init);
static void virt_2_9_instance_init(Object *obj)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
/* EL3 is disabled by default on virt: this makes us consistent
* between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1594,19 @@ static void virt_2_9_instance_init(Object *obj)
"Set GIC version. "
"Valid values are 2, 3 and host", NULL);
if (vmc->no_its) {
vms->its = false;
} else {
/* Default allows ITS instantiation */
vms->its = true;
object_property_add_bool(obj, "its", virt_get_its,
virt_set_its, NULL);
object_property_set_description(obj, "its",
"Set on/off to enable/disable "
"ITS instantiation",
NULL);
}
vms->memmap = a15memmap;
vms->irqmap = a15irqmap;
}

View File

@@ -323,7 +323,6 @@ static void zynq_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
mc->init = zynq_init;
mc->block_default_type = IF_SCSI;
mc->max_cpus = 1;
mc->no_sdcard = 1;
}

View File

@@ -106,6 +106,8 @@ static void xlnx_ep108_init(MachineState *machine)
sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.spi[i]), 1, cs_line);
}
/* TODO create and connect IDE devices for ide_drive_get() */
xlnx_ep108_binfo.ram_size = ram_size;
xlnx_ep108_binfo.kernel_filename = machine->kernel_filename;
xlnx_ep108_binfo.kernel_cmdline = machine->kernel_cmdline;
@@ -118,6 +120,8 @@ static void xlnx_ep108_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx ZynqMP EP108 board";
mc->init = xlnx_ep108_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("xlnx-ep108", xlnx_ep108_machine_init)
@@ -126,6 +130,8 @@ static void xlnx_zcu102_machine_init(MachineClass *mc)
{
mc->desc = "Xilinx ZynqMP ZCU102 board";
mc->init = xlnx_ep108_init;
mc->block_default_type = IF_IDE;
mc->units_per_default_bus = 1;
}
DEFINE_MACHINE("xlnx-zcu102", xlnx_zcu102_machine_init)

View File

@@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
g_free(s);
}
static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOBlock *s = (VirtIOBlock *)vdev;
@@ -155,7 +155,7 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
assert(s->dataplane);
assert(s->dataplane_started);
virtio_blk_handle_vq(s, vq);
return virtio_blk_handle_vq(s, vq);
}
/* Context: QEMU global mutex held */

View File

@@ -89,7 +89,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
static void virtio_blk_rw_complete(void *opaque, int ret)
{
VirtIOBlockReq *next = opaque;
VirtIOBlock *s = next->dev;
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
while (next) {
VirtIOBlockReq *req = next;
next = req->mr_next;
@@ -122,21 +124,27 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
virtio_blk_free_request(req);
}
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
static void virtio_blk_flush_complete(void *opaque, int ret)
{
VirtIOBlockReq *req = opaque;
VirtIOBlock *s = req->dev;
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
if (ret) {
if (virtio_blk_handle_rw_error(req, -ret, 0)) {
return;
goto out;
}
}
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
virtio_blk_free_request(req);
out:
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
#ifdef __linux__
@@ -150,7 +158,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
{
VirtIOBlockIoctlReq *ioctl_req = opaque;
VirtIOBlockReq *req = ioctl_req->req;
VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
VirtIOBlock *s = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
struct virtio_scsi_inhdr *scsi;
struct sg_io_hdr *hdr;
@@ -182,8 +191,10 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
out:
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
virtio_blk_req_complete(req, status);
virtio_blk_free_request(req);
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
g_free(ioctl_req);
}
@@ -581,17 +592,20 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
return 0;
}
void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
{
VirtIOBlockReq *req;
MultiReqBuffer mrb = {};
bool progress = false;
aio_context_acquire(blk_get_aio_context(s->blk));
blk_io_plug(s->blk);
do {
virtio_queue_set_notification(vq, 0);
while ((req = virtio_blk_get_request(s, vq))) {
progress = true;
if (virtio_blk_handle_request(req, &mrb)) {
virtqueue_detach_element(req->vq, &req->elem, 0);
virtio_blk_free_request(req);
@@ -607,6 +621,13 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
}
blk_io_unplug(s->blk);
aio_context_release(blk_get_aio_context(s->blk));
return progress;
}
static void virtio_blk_handle_output_do(VirtIOBlock *s, VirtQueue *vq)
{
virtio_blk_handle_vq(s, vq);
}
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -622,7 +643,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
return;
}
}
virtio_blk_handle_vq(s, vq);
virtio_blk_handle_output_do(s, vq);
}
static void virtio_blk_dma_restart_bh(void *opaque)
@@ -636,6 +657,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
s->rq = NULL;
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
while (req) {
VirtIOBlockReq *next = req->next;
if (virtio_blk_handle_request(req, &mrb)) {
@@ -656,6 +678,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
if (mrb.num_reqs) {
virtio_blk_submit_multireq(s->blk, &mrb);
}
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
static void virtio_blk_dma_restart_cb(void *opaque, int running,

View File

@@ -13,6 +13,7 @@ common-obj-$(CONFIG_PTIMER) += ptimer.o
common-obj-$(CONFIG_SOFTMMU) += sysbus.o
common-obj-$(CONFIG_SOFTMMU) += machine.o
common-obj-$(CONFIG_SOFTMMU) += loader.o
common-obj-$(CONFIG_FITLOADER) += loader-fit.o
common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
common-obj-$(CONFIG_SOFTMMU) += register.o
common-obj-$(CONFIG_SOFTMMU) += or-irq.o

View File

@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qemu-common.h"
#include "hw/irq.h"
#include "qom/object.h"

Some files were not shown because too many files have changed in this diff Show More