slirp: use less predictable directory name in /tmp for smb config (CVE-2015-4037)

In this version I used mkdtemp(3) which is: _BSD_SOURCE || /* Since glibc 2.10: */ (_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700) (POSIX.1-2008), so should be available on systems we care about. While at it, reset the resulting directory name within smb structure on error so cleanup function wont try to remove directory which we failed to create. Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> Reviewed-by: Markus Armbruster <armbru@redhat.com> (cherry picked from commit 8b8f1c7e9d) [BR: BSC#932267] Signed-off-by: Bruce Rogers <brogers@suse.com>
pcnet: force the buffer access to be in bounds during tx
2015-06-05 15:21:16 -06:00 · 2015-06-02 09:10:57 -06:00 · 2015-06-02 09:10:52 -06:00 · 2015-05-12 20:27:37 +02:00 · 2015-04-23 15:06:08 -06:00 · 2015-04-23 15:06:08 -06:00
196 changed files with 6303 additions and 1275 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -92,6 +92,8 @@ pc-bios/optionrom/multiboot.img
 pc-bios/optionrom/kvmvapic.bin
 pc-bios/optionrom/kvmvapic.raw
 pc-bios/optionrom/kvmvapic.img
+pc-bios/s390-ccw/s390-ccw.elf
+pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -16,16 +16,7 @@ block-obj-y += qapi-types.o qapi-visit.o

 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
-ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
-block-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
-else
-ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y)
-block-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o
-else
-block-obj-$(CONFIG_POSIX) += coroutine-gthread.o
-endif
-endif
-block-obj-$(CONFIG_WIN32) += coroutine-win32.o
+block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

 ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
 # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
--- a/2
+++ b/2
@@ -1 +1 @@
-1.4.0
+1.4.2
--- a/arch_init.c
+++ b/arch_init.c
@@ -114,26 +114,6 @@ const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40

-#ifdef __ALTIVEC__
-#include <altivec.h>
-#define VECTYPE        vector unsigned char
-#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
-#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
-/* altivec.h may redefine the bool macro as vector type.
- * Reset it to POSIX semantics. */
-#undef bool
-#define bool _Bool
-#elif defined __SSE2__
-#include <emmintrin.h>
-#define VECTYPE        __m128i
-#define SPLAT(p)       _mm_set1_epi8(*(p))
-#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
-#else
-#define VECTYPE        unsigned long
-#define SPLAT(p)       (*(p) * (~0UL / 255))
-#define ALL_EQ(v1, v2) ((v1) == (v2))
-#endif
-

 static struct defconfig_file {
    const char *filename;
@@ -164,19 +144,10 @@ int qemu_read_default_config_files(bool userconfig)
    return 0;
 }

-static int is_dup_page(uint8_t *page)
+static inline bool is_zero_page(uint8_t *p)
 {
-    VECTYPE *p = (VECTYPE *)page;
-    VECTYPE val = SPLAT(page);
-    int i;
-
-    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
-        if (!ALL_EQ(val, p[i])) {
-            return 0;
-        }
-    }
-
-    return 1;
+    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
+        TARGET_PAGE_SIZE;
 }

 /* struct contains XBZRLE cache and a static page
@@ -210,6 +181,7 @@ int64_t xbzrle_cache_resize(int64_t new_size)
 /* accounting for migration statistics */
 typedef struct AccountingInfo {
    uint64_t dup_pages;
+    uint64_t skipped_pages;
    uint64_t norm_pages;
    uint64_t iterations;
    uint64_t xbzrle_bytes;
@@ -235,6 +207,16 @@ uint64_t dup_mig_pages_transferred(void)
    return acct_info.dup_pages;
 }

+uint64_t skipped_mig_bytes_transferred(void)
+{
+    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+    return acct_info.skipped_pages;
+}
+
 uint64_t norm_mig_bytes_transferred(void)
 {
    return acct_info.norm_pages * TARGET_PAGE_SIZE;
@@ -347,6 +329,7 @@ static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;
+static bool ram_bulk_stage;

 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
@@ -356,7 +339,13 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);

-    unsigned long next = find_next_bit(migration_bitmap, size, nr);
+    unsigned long next;
+
+    if (ram_bulk_stage && nr > base) {
+        next = nr + 1;
+    } else {
+        next = find_next_bit(migration_bitmap, size, nr);
+    }

    if (next < size) {
        clear_bit(next, migration_bitmap);
@@ -451,6 +440,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
            if (!block) {
                block = QTAILQ_FIRST(&ram_list.blocks);
                complete_round = true;
+                ram_bulk_stage = false;
            }
        } else {
            uint8_t *p;
@@ -461,13 +451,13 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

            /* In doubt sent page as normal */
            bytes_sent = -1;
-            if (is_dup_page(p)) {
+            if (is_zero_page(p)) {
                acct_info.dup_pages++;
                bytes_sent = save_block_hdr(f, block, offset, cont,
                                            RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-                bytes_sent += 1;
-            } else if (migrate_use_xbzrle()) {
+                qemu_put_byte(f, 0);
+                bytes_sent++;
+            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
@@ -554,6 +544,7 @@ static void reset_ram_globals(void)
    last_sent_block = NULL;
    last_offset = 0;
    last_version = ram_list.version;
+    ram_bulk_stage = true;
 }

 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -745,7 +736,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    uint8_t len;

    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block) {
+        if (!block || block->length <= offset) {
            fprintf(stderr, "Ack, bad migration stream!\n");
            return NULL;
        }
@@ -758,8 +749,9 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    id[len] = 0;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)))
+        if (!strncmp(id, block->idstr, sizeof(id)) && block->length > offset) {
            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
    }

    fprintf(stderr, "Can't find block %s!\n", id);
@@ -833,14 +825,16 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
            }

            ch = qemu_get_byte(f);
-            memset(host, ch, TARGET_PAGE_SIZE);
+            if (ch != 0 || !is_zero_page(host)) {
+                memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
-            if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu()) &&
-                getpagesize() <= TARGET_PAGE_SIZE) {
-                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
-            }
+                if (ch == 0 &&
+                    (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                    getpagesize() <= TARGET_PAGE_SIZE) {
+                    qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+                }
 #endif
+            }
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
            void *host;

--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -41,6 +41,9 @@ static void entropy_available(void *opaque)
    ssize_t len;

    len = read(s->fd, buffer, s->size);
+    if (len < 0 && errno == EAGAIN) {
+        return;
+    }
    g_assert(len != -1);

    s->receive_func(s->opaque, buffer, len);
@@ -74,7 +77,7 @@ static void rng_random_opened(RngBackend *b, Error **errp)
        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
                  "filename", "a valid filename");
    } else {
-        s->fd = open(s->filename, O_RDONLY | O_NONBLOCK);
+        s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);

        if (s->fd == -1) {
            error_set(errp, QERR_OPEN_FILE_FAILED, s->filename);
@@ -130,7 +133,7 @@ static void rng_random_finalize(Object *obj)
    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);

    if (s->fd != -1) {
-        close(s->fd);
+        qemu_close(s->fd);
    }

    g_free(s->filename);
--- a/block.c
+++ b/block.c
@@ -1940,6 +1940,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
                              int nb_sectors)
 {
+    if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
+        return -EIO;
+    }
+
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
                                   nb_sectors * BDRV_SECTOR_SIZE);
 }
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -18,5 +18,7 @@ endif
 common-obj-y += stream.o
 common-obj-y += commit.o
 common-obj-y += mirror.o
+block-obj-y += dictzip.o
+block-obj-y += tar.o

 $(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -38,57 +38,42 @@

 // not allocated: 0xffffffff

-// always little-endian
-struct bochs_header_v1 {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
-    uint32_t version;
-    uint32_t header; // size of header
-
-    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 20];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
-    } extra;
-};
-
 // always little-endian
 struct bochs_header {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    char magic[32];     /* "Bochs Virtual HD Image" */
+    char type[16];      /* "Redolog" */
+    char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
    uint32_t version;
-    uint32_t header; // size of header
+    uint32_t header;    /* size of header */
+
+    uint32_t catalog;   /* num of entries */
+    uint32_t bitmap;    /* bitmap size */
+    uint32_t extent;    /* extent size */

    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint32_t reserved; // for ???
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 24];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
+        struct {
+            uint32_t reserved;  /* for ??? */
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 12];
+        } QEMU_PACKED redolog;
+        struct {
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 8];
+        } QEMU_PACKED redolog_v1;
+        char padding[HEADER_SIZE - 64 - 20];
    } extra;
-};
+} QEMU_PACKED;

 typedef struct BDRVBochsState {
    CoMutex lock;
    uint32_t *catalog_bitmap;
-    int catalog_size;
+    uint32_t catalog_size;

-    int data_offset;
+    uint32_t data_offset;

-    int bitmap_blocks;
-    int extent_blocks;
-    int extent_size;
+    uint32_t bitmap_blocks;
+    uint32_t extent_blocks;
+    uint32_t extent_size;
 } BDRVBochsState;

 static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -111,9 +96,8 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int bochs_open(BlockDriverState *bs, int flags)
 {
    BDRVBochsState *s = bs->opaque;
-    int i;
+    uint32_t i;
    struct bochs_header bochs;
-    struct bochs_header_v1 header_v1;
    int ret;

    bs->read_only = 1; // no write support yet
@@ -132,13 +116,20 @@ static int bochs_open(BlockDriverState *bs, int flags)
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-      memcpy(&header_v1, &bochs, sizeof(bochs));
-      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512;
    } else {
-      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+    }
+
+    /* Limit to 1M entries to avoid unbounded allocation. This is what is
+     * needed for the largest image that bximage can create (~8 TB). */
+    s->catalog_size = le32_to_cpu(bochs.catalog);
+    if (s->catalog_size > 0x100000) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Catalog size is too large");
+        return -EFBIG;
    }

-    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
@@ -152,10 +143,27 @@ static int bochs_open(BlockDriverState *bs, int flags)

    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);

-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;
+    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512;
+    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;

-    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);
+    s->extent_size = le32_to_cpu(bochs.extent);
+    if (s->extent_size == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Extent size may not be zero");
+        return -EINVAL;
+    } else if (s->extent_size > 0x800000) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Extent size %" PRIu32 " is too large",
+                      s->extent_size);
+        return -EINVAL;
+    }
+
+    if (s->catalog_size < bs->total_sectors / s->extent_size) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Catalog size is too small for this disk size");
+        ret = -EINVAL;
+        goto fail;
+    }

    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -168,8 +176,8 @@ fail:
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
-    int64_t offset = sector_num * 512;
-    int64_t extent_index, extent_offset, bitmap_offset;
+    uint64_t offset = sector_num * 512;
+    uint64_t extent_index, extent_offset, bitmap_offset;
    char bitmap_entry;

    // seek to sector
@@ -180,8 +188,9 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 	return -1; /* not allocated */
    }

-    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
-	(s->extent_blocks + s->bitmap_blocks));
+    bitmap_offset = s->data_offset +
+        (512 * (uint64_t) s->catalog_bitmap[extent_index] *
+        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
    if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -26,6 +26,9 @@
 #include "qemu/module.h"
 #include <zlib.h>

+/* Maximum compressed block size */
+#define MAX_BLOCK_SIZE (64 * 1024 * 1024)
+
 typedef struct BDRVCloopState {
    CoMutex lock;
    uint32_t block_size;
@@ -67,6 +70,29 @@ static int cloop_open(BlockDriverState *bs, int flags)
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
+    if (s->block_size % 512) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size %u must be a multiple of 512",
+                      s->block_size);
+        return -EINVAL;
+    }
+    if (s->block_size == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size cannot be zero");
+        return -EINVAL;
+    }
+
+    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
+     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
+     * need a buffer this big.
+     */
+    if (s->block_size > MAX_BLOCK_SIZE) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size %u must be %u MB or less",
+                      s->block_size,
+                      MAX_BLOCK_SIZE / (1024 * 1024));
+        return -EINVAL;
+    }

    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
@@ -75,7 +101,25 @@ static int cloop_open(BlockDriverState *bs, int flags)
    s->n_blocks = be32_to_cpu(s->n_blocks);

    /* read offsets */
-    offsets_size = s->n_blocks * sizeof(uint64_t);
+    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
+        /* Prevent integer overflow */
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "n_blocks %u must be %zu or less",
+                      s->n_blocks,
+                      (UINT32_MAX - 1) / sizeof(uint64_t));
+        return -EINVAL;
+    }
+    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
+    if (offsets_size > 512 * 1024 * 1024) {
+        /* Prevent ridiculous offsets_size which causes memory allocation to
+         * fail or overflows bdrv_pread() size.  In practice the 512 MB
+         * offsets[] limit supports 16 TB images at 256 KB block size.
+         */
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "image requires too many offsets, "
+                      "try increasing block size");
+        return -EINVAL;
+    }
    s->offsets = g_malloc(offsets_size);

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
@@ -83,13 +127,39 @@ static int cloop_open(BlockDriverState *bs, int flags)
        goto fail;
    }

-    for(i=0;i<s->n_blocks;i++) {
+    for (i = 0; i < s->n_blocks + 1; i++) {
+        uint64_t size;
+
        s->offsets[i] = be64_to_cpu(s->offsets[i]);
-        if (i > 0) {
-            uint32_t size = s->offsets[i] - s->offsets[i - 1];
-            if (size > max_compressed_block_size) {
-                max_compressed_block_size = size;
-            }
+        if (i == 0) {
+            continue;
+        }
+
+        if (s->offsets[i] < s->offsets[i - 1]) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "offsets not monotonically increasing at "
+                          "index %u, image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        size = s->offsets[i] - s->offsets[i - 1];
+
+        /* Compressed blocks should be smaller than the uncompressed block size
+         * but maybe compression performed poorly so the compressed block is
+         * actually bigger.  Clamp down on unrealistic values to prevent
+         * ridiculous s->compressed_block allocation.
+         */
+        if (size > 2 * MAX_BLOCK_SIZE) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "invalid compressed block size at index %u, "
+                          "image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        if (size > max_compressed_block_size) {
+            max_compressed_block_size = size;
        }
    }

@@ -179,9 +249,7 @@ static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
 static void cloop_close(BlockDriverState *bs)
 {
    BDRVCloopState *s = bs->opaque;
-    if (s->n_blocks > 0) {
-        g_free(s->offsets);
-    }
+    g_free(s->offsets);
    g_free(s->compressed_block);
    g_free(s->uncompressed_block);
    inflateEnd(&s->zstream);
--- a/block/curl.c
+++ b/block/curl.c
@@ -134,6 +134,11 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    if (!s || !s->orig_buf)
        goto read_end;

+    if (s->buf_off >= s->buf_len) {
+        /* buffer full, read nothing */
+        return 0;
+    }
+    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
    s->buf_off += realsize;

--- a/block/dictzip.c
+++ b/block/dictzip.c
@@ -0,0 +1,572 @@
+/*
+ * DictZip Block driver for dictzip enabled gzip files
+ *
+ * Use the "dictzip" tool from the "dictd" package to create gzip files that
+ * contain the extra DictZip headers.
+ *
+ * dictzip(1) is a compression program which creates compressed files in the
+ * gzip format (see RFC 1952). However, unlike gzip(1), dictzip(1) compresses
+ * the file in pieces and stores an index to the pieces in the gzip header.
+ * This allows random access to the file at the granularity of the compressed
+ * pieces (currently about 64kB) while maintaining good compression ratios
+ * (within 5% of the expected ratio for dictionary data).
+ * dictd(8) uses files stored in this format.
+ *
+ * For details on DictZip see http://dict.org/.
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("dzip: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE 512
+#define Z_STREAM_COUNT 4
+#define CACHE_COUNT 20
+
+/* magic values */
+
+#define GZ_MAGIC1     0x1f
+#define GZ_MAGIC2     0x8b
+#define DZ_MAGIC1      'R'
+#define DZ_MAGIC2      'A'
+
+#define GZ_FEXTRA     0x04      /* Optional field (random access index)    */
+#define GZ_FNAME      0x08      /* Original name                           */
+#define GZ_COMMENT    0x10      /* Zero-terminated, human-readable comment */
+#define GZ_FHCRC      0x02      /* Header CRC16                            */
+
+/* offsets */
+
+#define GZ_ID            0      /* GZ_MAGIC (16bit)                        */
+#define GZ_FLG           3      /* FLaGs (see above)                       */
+#define GZ_XLEN         10      /* eXtra LENgth (16bit)                    */
+#define GZ_SI           12      /* Subfield ID (16bit)                     */
+#define GZ_VERSION      16      /* Version for subfield format             */
+#define GZ_CHUNKSIZE    18      /* Chunk size (16bit)                      */
+#define GZ_CHUNKCNT     20      /* Number of chunks (16bit)                */
+#define GZ_RNDDATA      22      /* Random access data (16bit)              */
+
+#define GZ_99_CHUNKSIZE 18      /* Chunk size (32bit)                      */
+#define GZ_99_CHUNKCNT  22      /* Number of chunks (32bit)                */
+#define GZ_99_FILESIZE  26      /* Size of unpacked file (64bit)           */
+#define GZ_99_RNDDATA   34      /* Random access data (32bit)              */
+
+struct BDRVDictZipState;
+
+typedef struct DictZipAIOCB {
+    BlockDriverAIOCB common;
+    struct BDRVDictZipState *s;
+    QEMUIOVector *qiov;          /* QIOV of the original request */
+    QEMUIOVector *qiov_gz;       /* QIOV of the gz subrequest */
+    QEMUBH *bh;                  /* BH for cache */
+    z_stream *zStream;           /* stream to use for decoding */
+    int zStream_id;              /* stream id of the above pointer */
+    size_t start;                /* offset into the uncompressed file */
+    size_t len;                  /* uncompressed bytes to read */
+    uint8_t *gzipped;            /* the gzipped data */
+    uint8_t *buf;                /* cached result */
+    size_t gz_len;               /* amount of gzip data */
+    size_t gz_start;             /* uncompressed starting point of gzip data */
+    uint64_t offset;             /* offset for "start" into the uncompressed chunk */
+    int chunks_len;              /* amount of uncompressed data in all gzip data */
+} DictZipAIOCB;
+
+typedef struct dict_cache {
+    size_t start;
+    size_t len;
+    uint8_t *buf;
+} DictCache;
+
+typedef struct BDRVDictZipState {
+    BlockDriverState *hd;
+    z_stream zStream[Z_STREAM_COUNT];
+    DictCache cache[CACHE_COUNT];
+    int cache_index;
+    uint8_t  stream_in_use;
+    uint64_t chunk_len;
+    uint32_t chunk_cnt;
+    uint16_t *chunks;
+    uint32_t *chunks32;
+    uint64_t *offsets;
+    int64_t file_len;
+} BDRVDictZipState;
+
+static int dictzip_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    if (buf_size < 2)
+        return 0;
+
+    /* We match on every gzip file */
+    if ((buf[0] == GZ_MAGIC1) && (buf[1] == GZ_MAGIC2))
+        return 100;
+
+    return 0;
+}
+
+static int start_zStream(z_stream *zStream)
+{
+    zStream->zalloc    = NULL;
+    zStream->zfree     = NULL;
+    zStream->opaque    = NULL;
+    zStream->next_in   = 0;
+    zStream->avail_in  = 0;
+    zStream->next_out  = NULL;
+    zStream->avail_out = 0;
+
+    return inflateInit2( zStream, -15 );
+}
+
+static int dictzip_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVDictZipState *s = bs->opaque;
+    const char *err = "Unknown (read error?)";
+    uint8_t magic[2];
+    char buf[100];
+    uint8_t header_flags;
+    uint16_t chunk_len16;
+    uint16_t chunk_cnt16;
+    uint32_t chunk_len32;
+    uint16_t header_ver;
+    uint16_t tmp_short;
+    uint64_t offset;
+    int chunks_len;
+    int headerLength = GZ_XLEN - 1;
+    int rnd_offs;
+    int ret;
+    int i;
+    const char *fname = filename;
+
+    if (!strncmp(filename, "dzip://", 7))
+        fname += 7;
+    else if (!strncmp(filename, "dzip:", 5))
+        fname += 5;
+
+    ret = bdrv_file_open(&s->hd, fname, flags);
+    if (ret < 0)
+        return ret;
+
+    /* initialize zlib streams */
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        if (start_zStream( &s->zStream[i] ) != Z_OK) {
+            err = s->zStream[i].msg;
+            goto fail;
+        }
+    }
+
+    /* gzip header */
+    if (bdrv_pread(s->hd, GZ_ID, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    if (!((magic[0] == GZ_MAGIC1) && (magic[1] == GZ_MAGIC2))) {
+        err = "No gzip file";
+        goto fail;
+    }
+
+    /* dzip header */
+    if (bdrv_pread(s->hd, GZ_FLG, &header_flags, 1) != 1)
+        goto fail;
+
+    if (!(header_flags & GZ_FEXTRA)) {
+        err = "Not a dictzip file (wrong flags)";
+        goto fail;
+    }
+
+    /* extra length */
+    if (bdrv_pread(s->hd, GZ_XLEN, &tmp_short, 2) != 2)
+        goto fail;
+
+    headerLength += le16_to_cpu(tmp_short) + 2;
+
+    /* DictZip magic */
+    if (bdrv_pread(s->hd, GZ_SI, &magic, 2) != 2)
+        goto fail;
+
+    if (magic[0] != DZ_MAGIC1 || magic[1] != DZ_MAGIC2) {
+        err = "Not a dictzip file (missing extra magic)";
+        goto fail;
+    }
+
+    /* DictZip version */
+    if (bdrv_pread(s->hd, GZ_VERSION, &header_ver, 2) != 2)
+        goto fail;
+
+    header_ver = le16_to_cpu(header_ver);
+
+    switch (header_ver) {
+        case 1: /* Normal DictZip */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_CHUNKSIZE, &chunk_len16, 2) != 2)
+                goto fail;
+
+            s->chunk_len = le16_to_cpu(chunk_len16);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_CHUNKCNT, &chunk_cnt16, 2) != 2)
+                goto fail;
+
+            s->chunk_cnt = le16_to_cpu(chunk_cnt16);
+            chunks_len = sizeof(short) * s->chunk_cnt;
+            rnd_offs = GZ_RNDDATA;
+            break;
+        case 99: /* Special Alex pigz version */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKSIZE, &chunk_len32, 4) != 4)
+                goto fail;
+
+            dprintf("chunk len [%#x] = %d\n", GZ_99_CHUNKSIZE, chunk_len32);
+            s->chunk_len = le32_to_cpu(chunk_len32);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKCNT, &s->chunk_cnt, 4) != 4)
+                goto fail;
+
+            s->chunk_cnt = le32_to_cpu(s->chunk_cnt);
+
+            dprintf("chunk len | count = %d | %d\n", s->chunk_len, s->chunk_cnt);
+
+            /* file size */
+            if (bdrv_pread(s->hd, GZ_99_FILESIZE, &s->file_len, 8) != 8)
+                goto fail;
+
+            s->file_len = le64_to_cpu(s->file_len);
+            chunks_len = sizeof(int) * s->chunk_cnt;
+            rnd_offs = GZ_99_RNDDATA;
+            break;
+        default:
+            err = "Invalid DictZip version";
+            goto fail;
+    }
+
+    /* random access data */
+    s->chunks = g_malloc(chunks_len);
+    if (header_ver == 99)
+        s->chunks32 = (uint32_t *)s->chunks;
+
+    if (bdrv_pread(s->hd, rnd_offs, s->chunks, chunks_len) != chunks_len)
+        goto fail;
+
+    /* orig filename */
+    if (header_flags & GZ_FNAME) {
+        if (bdrv_pread(s->hd, headerLength + 1, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("filename: %s\n", buf);
+    }
+
+    /* comment field */
+    if (header_flags & GZ_COMMENT) {
+        if (bdrv_pread(s->hd, headerLength, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("comment: %s\n", buf);
+    }
+
+    if (header_flags & GZ_FHCRC)
+        headerLength += 2;
+
+    /* uncompressed file length*/
+    if (!s->file_len) {
+        uint32_t file_len;
+
+        if (bdrv_pread(s->hd, bdrv_getlength(s->hd) - 4, &file_len, 4) != 4)
+            goto fail;
+
+        s->file_len = le32_to_cpu(file_len);
+    }
+
+    /* compute offsets */
+    s->offsets = g_malloc(sizeof( *s->offsets ) * s->chunk_cnt);
+
+    for (offset = headerLength + 1, i = 0; i < s->chunk_cnt; i++) {
+        s->offsets[i] = offset;
+        switch (header_ver) {
+        case 1:
+            offset += le16_to_cpu(s->chunks[i]);
+            break;
+        case 99:
+            offset += le32_to_cpu(s->chunks32[i]);
+            break;
+        }
+
+        dprintf("chunk %#x - %#x = offset %#x -> %#x\n", i * s->chunk_len, (i+1) * s->chunk_len, s->offsets[i], offset);
+    }
+
+    return 0;
+
+fail:
+    fprintf(stderr, "DictZip: Error opening file: %s\n", err);
+    bdrv_delete(s->hd);
+    if (s->chunks)
+        g_free(s->chunks);
+    return -EINVAL;
+}
+
+/* This callback gets invoked when we have the result in cache already */
+static void dictzip_cache_cb(void *opaque)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+
+    qemu_iovec_from_buf(acb->qiov, 0, acb->buf, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+/* This callback gets invoked by the underlying block reader when we have
+ * all compressed data. We uncompress in here. */
+static void dictzip_read_cb(void *opaque, int ret)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+    struct BDRVDictZipState *s = acb->s;
+    uint8_t *buf;
+    DictCache *cache;
+    int r, i;
+
+    buf = g_malloc(acb->chunks_len);
+
+    /* try to find zlib stream for decoding */
+    do {
+        for (i = 0; i < Z_STREAM_COUNT; i++) {
+            if (!(s->stream_in_use & (1 << i))) {
+                s->stream_in_use |= (1 << i);
+                acb->zStream_id = i;
+                acb->zStream = &s->zStream[i];
+                break;
+            }
+        }
+    } while(!acb->zStream);
+
+    /* sure, we could handle more streams, but this callback should be single
+       threaded and when it's not, we really want to know! */
+    assert(i == 0);
+
+    /* uncompress the chunk */
+    acb->zStream->next_in   = acb->gzipped;
+    acb->zStream->avail_in  = acb->gz_len;
+    acb->zStream->next_out  = buf;
+    acb->zStream->avail_out = acb->chunks_len;
+
+    r = inflate( acb->zStream,  Z_PARTIAL_FLUSH );
+    if ( (r != Z_OK) && (r != Z_STREAM_END) )
+        fprintf(stderr, "Error inflating: [%d] %s\n", r, acb->zStream->msg);
+
+    if ( r == Z_STREAM_END )
+        inflateReset(acb->zStream);
+
+    dprintf("inflating [%d] left: %d | %d bytes\n", r, acb->zStream->avail_in, acb->zStream->avail_out);
+    s->stream_in_use &= ~(1 << acb->zStream_id);
+
+    /* nofity the caller */
+    qemu_iovec_from_buf(acb->qiov, 0, buf + acb->offset, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+
+    /* fill the cache */
+    cache = &s->cache[s->cache_index];
+    s->cache_index++;
+    if (s->cache_index == CACHE_COUNT)
+        s->cache_index = 0;
+
+    cache->len = 0;
+    if (cache->buf)
+        g_free(cache->buf);
+    cache->start = acb->gz_start;
+    cache->buf = buf;
+    cache->len = acb->chunks_len;
+
+    /* free occupied ressources */
+    g_free(acb->qiov_gz);
+    qemu_aio_release(acb);
+}
+
+static void dictzip_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+}
+
+static const AIOCBInfo dictzip_aiocb_info = {
+    .aiocb_size         = sizeof(DictZipAIOCB),
+    .cancel             = dictzip_aio_cancel,
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockDriverAIOCB *dictzip_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVDictZipState *s = bs->opaque;
+    DictZipAIOCB *acb;
+    QEMUIOVector *qiov_gz;
+    struct iovec *iov;
+    uint8_t *buf;
+    size_t  start = sector_num * SECTOR_SIZE;
+    size_t  len = nb_sectors * SECTOR_SIZE;
+    size_t  end = start + len;
+    size_t  gz_start;
+    size_t  gz_len;
+    int64_t gz_sector_num;
+    int     gz_nb_sectors;
+    int     first_chunk, last_chunk;
+    int     first_offset;
+    int     i;
+
+    acb = qemu_aio_get(&dictzip_aiocb_info, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    /* Search Cache */
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        if ((start >= s->cache[i].start) &&
+            (end <= (s->cache[i].start + s->cache[i].len))) {
+            acb->buf = s->cache[i].buf + (start - s->cache[i].start);
+            acb->len = len;
+            acb->qiov = qiov;
+            acb->bh = qemu_bh_new(dictzip_cache_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    /* No cache, so let's decode */
+    /* We need to read these chunks */
+    first_chunk  = start / s->chunk_len;
+    first_offset = start - first_chunk * s->chunk_len;
+    last_chunk   = end / s->chunk_len;
+
+    gz_start = s->offsets[first_chunk];
+    gz_len = 0;
+    for (i = first_chunk; i <= last_chunk; i++) {
+        if (s->chunks32)
+            gz_len += le32_to_cpu(s->chunks32[i]);
+        else
+            gz_len += le16_to_cpu(s->chunks[i]);
+    }
+
+    gz_sector_num = gz_start / SECTOR_SIZE;
+    gz_nb_sectors = (gz_len / SECTOR_SIZE);
+
+    /* account for tail and heads */
+    while ((gz_start + gz_len) > ((gz_sector_num + gz_nb_sectors) * SECTOR_SIZE))
+        gz_nb_sectors++;
+
+    /* Allocate qiov, iov and buf in one chunk so we only need to free qiov */
+    qiov_gz = g_malloc0(sizeof(QEMUIOVector) + sizeof(struct iovec) +
+                           (gz_nb_sectors * SECTOR_SIZE));
+    iov = (struct iovec *)(((char *)qiov_gz) + sizeof(QEMUIOVector));
+    buf = ((uint8_t *)iov) + sizeof(struct iovec *);
+
+    /* Kick off the read by the backing file, so we can start decompressing */
+    iov->iov_base = (void *)buf;
+    iov->iov_len = gz_nb_sectors * 512;
+    qemu_iovec_init_external(qiov_gz, iov, 1);
+
+    dprintf("read %d - %d => %d - %d\n", start, end, gz_start, gz_start + gz_len);
+
+    acb->s = s;
+    acb->qiov = qiov;
+    acb->qiov_gz = qiov_gz;
+    acb->start = start;
+    acb->len = len;
+    acb->gzipped = buf + (gz_start % SECTOR_SIZE);
+    acb->gz_len = gz_len;
+    acb->gz_start = first_chunk * s->chunk_len;
+    acb->offset = first_offset;
+    acb->chunks_len = (last_chunk - first_chunk + 1) * s->chunk_len;
+
+    return bdrv_aio_readv(s->hd, gz_sector_num, qiov_gz, gz_nb_sectors,
+                          dictzip_read_cb, acb);
+}
+
+static void dictzip_close(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        g_free(s->cache[i].buf);
+    }
+
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        inflateEnd(&s->zStream[i]);
+    }
+
+    if (s->chunks)
+        g_free(s->chunks);
+
+    if (s->offsets)
+        g_free(s->offsets);
+
+    dprintf("Close\n");
+}
+
+static int64_t dictzip_getlength(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_dictzip = {
+    .format_name     = "dzip",
+    .protocol_name   = "dzip",
+
+    .instance_size   = sizeof(BDRVDictZipState),
+    .bdrv_file_open  = dictzip_open,
+    .bdrv_close      = dictzip_close,
+    .bdrv_getlength  = dictzip_getlength,
+    .bdrv_probe      = dictzip_probe,
+
+    .bdrv_aio_readv  = dictzip_aio_readv,
+};
+
+static void dictzip_block_init(void)
+{
+    bdrv_register(&bdrv_dictzip);
+}
+
+block_init(dictzip_block_init);
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -27,6 +27,14 @@
 #include "qemu/module.h"
 #include <zlib.h>

+enum {
+    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
+     * or truncating when converting to 32-bit types
+     */
+    DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */
+    DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
+};
+
 typedef struct BDRVDMGState {
    CoMutex lock;
    /* each chunk contains a certain number of sectors,
@@ -85,12 +93,43 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

+/* Increase max chunk sizes, if necessary.  This function is used to calculate
+ * the buffer sizes needed for compressed/uncompressed chunk I/O.
+ */
+static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
+                                  uint32_t *max_compressed_size,
+                                  uint32_t *max_sectors_per_chunk)
+{
+    uint32_t compressed_size = 0;
+    uint32_t uncompressed_sectors = 0;
+
+    switch (s->types[chunk]) {
+    case 0x80000005: /* zlib compressed */
+        compressed_size = s->lengths[chunk];
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    case 1: /* copy */
+        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
+        break;
+    case 2: /* zero */
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    }
+
+    if (compressed_size > *max_compressed_size) {
+        *max_compressed_size = compressed_size;
+    }
+    if (uncompressed_sectors > *max_sectors_per_chunk) {
+        *max_sectors_per_chunk = uncompressed_sectors;
+    }
+}
+
 static int dmg_open(BlockDriverState *bs, int flags)
 {
    BDRVDMGState *s = bs->opaque;
-    uint64_t info_begin,info_end,last_in_offset,last_out_offset;
+    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
    uint32_t count, tmp;
-    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
+    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
    int64_t offset;
    int ret;

@@ -152,37 +191,40 @@ static int dmg_open(BlockDriverState *bs, int flags)
            goto fail;
        }

-	if (type == 0x6d697368 && count >= 244) {
-	    int new_size, chunk_count;
+        if (type == 0x6d697368 && count >= 244) {
+            size_t new_size;
+            uint32_t chunk_count;

            offset += 4;
            offset += 200;

-	    chunk_count = (count-204)/40;
-	    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-	    s->types = g_realloc(s->types, new_size/2);
-	    s->offsets = g_realloc(s->offsets, new_size);
-	    s->lengths = g_realloc(s->lengths, new_size);
-	    s->sectors = g_realloc(s->sectors, new_size);
-	    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+            chunk_count = (count - 204) / 40;
+            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+            s->types = g_realloc(s->types, new_size / 2);
+            s->offsets = g_realloc(s->offsets, new_size);
+            s->lengths = g_realloc(s->lengths, new_size);
+            s->sectors = g_realloc(s->sectors, new_size);
+            s->sectorcounts = g_realloc(s->sectorcounts, new_size);

            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
                ret = read_uint32(bs, offset, &s->types[i]);
                if (ret < 0) {
                    goto fail;
                }
-		offset += 4;
-		if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
-		    if(s->types[i]==0xffffffff) {
-			last_in_offset = s->offsets[i-1]+s->lengths[i-1];
-			last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
-		    }
-		    chunk_count--;
-		    i--;
-		    offset += 36;
-		    continue;
-		}
-		offset += 4;
+                offset += 4;
+                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
+                    s->types[i] != 2) {
+                    if (s->types[i] == 0xffffffff && i > 0) {
+                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
+                        last_out_offset = s->sectors[i - 1] +
+                                          s->sectorcounts[i - 1];
+                    }
+                    chunk_count--;
+                    i--;
+                    offset += 36;
+                    continue;
+                }
+                offset += 4;

                ret = read_uint64(bs, offset, &s->sectors[i]);
                if (ret < 0) {
@@ -197,6 +239,14 @@ static int dmg_open(BlockDriverState *bs, int flags)
                }
                offset += 8;

+                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+                    error_report("sector count %" PRIu64 " for chunk %u is "
+                                 "larger than max (%u)",
+                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
                ret = read_uint64(bs, offset, &s->offsets[i]);
                if (ret < 0) {
                    goto fail;
@@ -210,19 +260,25 @@ static int dmg_open(BlockDriverState *bs, int flags)
                }
                offset += 8;

-		if(s->lengths[i]>max_compressed_size)
-		    max_compressed_size = s->lengths[i];
-		if(s->sectorcounts[i]>max_sectors_per_chunk)
-		    max_sectors_per_chunk = s->sectorcounts[i];
-	    }
-	    s->n_chunks+=chunk_count;
-	}
+                if (s->lengths[i] > DMG_LENGTHS_MAX) {
+                    error_report("length %" PRIu64 " for chunk %u is larger "
+                                 "than max (%u)",
+                                 s->lengths[i], i, DMG_LENGTHS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                update_max_chunk_size(s, i, &max_compressed_size,
+                                      &max_sectors_per_chunk);
+            }
+            s->n_chunks += chunk_count;
+        }
    }

    /* initialize zlib engine */
-    s->compressed_chunk = g_malloc(max_compressed_size+1);
-    s->uncompressed_chunk = g_malloc(512*max_sectors_per_chunk);
-    if(inflateInit(&s->zstream) != Z_OK) {
+    s->compressed_chunk = g_malloc(max_compressed_size + 1);
+    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }
@@ -244,83 +300,82 @@ fail:
 }

 static inline int is_sector_in_chunk(BDRVDMGState* s,
-		uint32_t chunk_num,int sector_num)
+                uint32_t chunk_num, uint64_t sector_num)
 {
-    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
-	    s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
-	return 0;
-    else
-	return -1;
+    if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num ||
+            s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) {
+        return 0;
+    } else {
+        return -1;
+    }
 }

-static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
+static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
 {
    /* binary search */
-    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
-    while(chunk1!=chunk2) {
-	chunk3 = (chunk1+chunk2)/2;
-	if(s->sectors[chunk3]>sector_num)
-	    chunk2 = chunk3;
-	else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
-	    return chunk3;
-	else
-	    chunk1 = chunk3;
+    uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
+    while (chunk1 != chunk2) {
+        chunk3 = (chunk1 + chunk2) / 2;
+        if (s->sectors[chunk3] > sector_num) {
+            chunk2 = chunk3;
+        } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
+            return chunk3;
+        } else {
+            chunk1 = chunk3;
+        }
    }
    return s->n_chunks; /* error */
 }

-static inline int dmg_read_chunk(BlockDriverState *bs, int sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 {
    BDRVDMGState *s = bs->opaque;

-    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
-	int ret;
-	uint32_t chunk = search_chunk(s,sector_num);
+    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
+        int ret;
+        uint32_t chunk = search_chunk(s, sector_num);

-	if(chunk>=s->n_chunks)
-	    return -1;
+        if (chunk >= s->n_chunks) {
+            return -1;
+        }

-	s->current_chunk = s->n_chunks;
-	switch(s->types[chunk]) {
-	case 0x80000005: { /* zlib compressed */
-	    int i;
+        s->current_chunk = s->n_chunks;
+        switch (s->types[chunk]) {
+        case 0x80000005: { /* zlib compressed */
+            /* we need to buffer, because only the chunk as whole can be
+             * inflated. */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
+                             s->compressed_chunk, s->lengths[chunk]);
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }

-	    /* we need to buffer, because only the chunk as whole can be
-	     * inflated. */
-	    i=0;
-	    do {
-                ret = bdrv_pread(bs->file, s->offsets[chunk] + i,
-                                 s->compressed_chunk+i, s->lengths[chunk]-i);
-		if(ret<0 && errno==EINTR)
-		    ret=0;
-		i+=ret;
-	    } while(ret>=0 && ret+i<s->lengths[chunk]);
-
-	    if (ret != s->lengths[chunk])
-		return -1;
-
-	    s->zstream.next_in = s->compressed_chunk;
-	    s->zstream.avail_in = s->lengths[chunk];
-	    s->zstream.next_out = s->uncompressed_chunk;
-	    s->zstream.avail_out = 512*s->sectorcounts[chunk];
-	    ret = inflateReset(&s->zstream);
-	    if(ret != Z_OK)
-		return -1;
-	    ret = inflate(&s->zstream, Z_FINISH);
-	    if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
-		return -1;
-	    break; }
-	case 1: /* copy */
-	    ret = bdrv_pread(bs->file, s->offsets[chunk],
+            s->zstream.next_in = s->compressed_chunk;
+            s->zstream.avail_in = s->lengths[chunk];
+            s->zstream.next_out = s->uncompressed_chunk;
+            s->zstream.avail_out = 512 * s->sectorcounts[chunk];
+            ret = inflateReset(&s->zstream);
+            if (ret != Z_OK) {
+                return -1;
+            }
+            ret = inflate(&s->zstream, Z_FINISH);
+            if (ret != Z_STREAM_END ||
+                s->zstream.total_out != 512 * s->sectorcounts[chunk]) {
+                return -1;
+            }
+            break; }
+        case 1: /* copy */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
-	    if (ret != s->lengths[chunk])
-		return -1;
-	    break;
-	case 2: /* zero */
-	    memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
-	    break;
-	}
-	s->current_chunk = chunk;
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }
+            break;
+        case 2: /* zero */
+            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
+            break;
+        }
+        s->current_chunk = chunk;
    }
    return 0;
 }
@@ -331,12 +386,14 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
    BDRVDMGState *s = bs->opaque;
    int i;

-    for(i=0;i<nb_sectors;i++) {
-	uint32_t sector_offset_in_chunk;
-	if(dmg_read_chunk(bs, sector_num+i) != 0)
-	    return -1;
-	sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
-	memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
+    for (i = 0; i < nb_sectors; i++) {
+        uint32_t sector_offset_in_chunk;
+        if (dmg_read_chunk(bs, sector_num + i) != 0) {
+            return -1;
+        }
+        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
+        memcpy(buf + i * 512,
+               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
    }
    return 0;
 }
@@ -368,12 +425,12 @@ static void dmg_close(BlockDriverState *bs)
 }

 static BlockDriver bdrv_dmg = {
-    .format_name	= "dmg",
-    .instance_size	= sizeof(BDRVDMGState),
-    .bdrv_probe		= dmg_probe,
-    .bdrv_open		= dmg_open,
-    .bdrv_read          = dmg_co_read,
-    .bdrv_close		= dmg_close,
+    .format_name    = "dmg",
+    .instance_size  = sizeof(BDRVDMGState),
+    .bdrv_probe     = dmg_probe,
+    .bdrv_open      = dmg_open,
+    .bdrv_read      = dmg_co_read,
+    .bdrv_close     = dmg_close,
 };

 static void bdrv_dmg_init(void)
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -274,7 +274,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
        ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
                            offset, request->len);
        if (ret != request->len) {
-            return -EIO;
+            rc = -EIO;
        }
    }
    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
@@ -350,7 +350,7 @@ static int nbd_establish_connection(BlockDriverState *bs)

    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
-    socket_set_nonblock(sock);
+    qemu_set_nonblock(sock);
    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
                            nbd_have_request, s);

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -49,9 +49,9 @@ typedef struct BDRVParallelsState {
    CoMutex lock;

    uint32_t *catalog_bitmap;
-    int catalog_size;
+    unsigned int catalog_size;

-    int tracks;
+    unsigned int tracks;
 } BDRVParallelsState;

 static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -91,8 +91,19 @@ static int parallels_open(BlockDriverState *bs, int flags)
    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
+    if (s->tracks == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Invalid image: Zero sectors per track");
+        ret = -EINVAL;
+        goto fail;
+    }

    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    if (s->catalog_size > INT_MAX / 4) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Catalog too large");
+        ret = -EFBIG;
+        goto fail;
+    }
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -60,7 +60,7 @@ typedef struct BDRVQcowState {
    int cluster_sectors;
    int l2_bits;
    int l2_size;
-    int l1_size;
+    unsigned int l1_size;
    uint64_t cluster_offset_mask;
    uint64_t l1_table_offset;
    uint64_t *l1_table;
@@ -124,10 +124,28 @@ static int qcow_open(BlockDriverState *bs, int flags)
        goto fail;
    }

-    if (header.size <= 1 || header.cluster_bits < 9) {
+    if (header.size <= 1) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Image size is too small (must be at least 2 bytes)");
        ret = -EINVAL;
        goto fail;
    }
+    if (header.cluster_bits < 9 || header.cluster_bits > 16) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Cluster size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+     * so bytes = num_entries << 3. */
+    if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "L2 table size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (header.crypt_method > QCOW_CRYPT_AES) {
        ret = -EINVAL;
        goto fail;
@@ -146,7 +164,19 @@ static int qcow_open(BlockDriverState *bs, int flags)

    /* read the level 1 table */
    shift = s->cluster_bits + s->l2_bits;
-    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+    if (header.size > UINT64_MAX - (1LL << shift)) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Image too large");
+        ret = -EINVAL;
+        goto fail;
+    } else {
+        uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift;
+        if (l1_size > INT_MAX / sizeof(uint64_t)) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Image too large");
+            ret = -EINVAL;
+            goto fail;
+        }
+        s->l1_size = l1_size;
+    }

    s->l1_table_offset = header.l1_table_offset;
    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -29,12 +29,13 @@
 #include "block/qcow2.h"
 #include "trace.h"

-int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+                        bool exact_size)
 {
    BDRVQcowState *s = bs->opaque;
-    int new_l1_size, new_l1_size2, ret, i;
+    int new_l1_size2, ret, i;
    uint64_t *new_l1_table;
-    int64_t new_l1_table_offset;
+    int64_t new_l1_table_offset, new_l1_size;
    uint8_t data[12];

    if (min_size <= s->l1_size)
@@ -53,8 +54,13 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
        }
    }

+    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+        return -EFBIG;
+    }
+
 #ifdef DEBUG_ALLOC2
-    fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
+    fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
+            s->l1_size, new_l1_size);
 #endif

    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
@@ -324,15 +330,6 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
    struct iovec iov;
    int n, ret;

-    /*
-     * If this is the last cluster and it is only partially used, we must only
-     * copy until the end of the image, or bdrv_check_request will fail for the
-     * bdrv_read/write calls below.
-     */
-    if (start_sect + n_end > bs->total_sectors) {
-        n_end = bs->total_sectors - start_sect;
-    }
-
    n = n_end - n_start;
    if (n <= 0) {
        return 0;
@@ -391,8 +388,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset)
 {
    BDRVQcowState *s = bs->opaque;
-    unsigned int l1_index, l2_index;
-    uint64_t l2_offset, *l2_table;
+    unsigned int l2_index;
+    uint64_t l1_index, l2_offset, *l2_table;
    int l1_bits, c;
    unsigned int index_in_cluster, nb_clusters;
    uint64_t nb_available, nb_needed;
@@ -454,6 +451,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
    case QCOW2_CLUSTER_ZERO:
+        if (s->qcow_version < 3) {
+            return -EIO;
+        }
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                &l2_table[l2_index], 0,
                QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
@@ -504,8 +504,8 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
                             int *new_l2_index)
 {
    BDRVQcowState *s = bs->opaque;
-    unsigned int l1_index, l2_index;
-    uint64_t l2_offset;
+    unsigned int l2_index;
+    uint64_t l1_index, l2_offset;
    uint64_t *l2_table = NULL;
    int ret;

@@ -519,6 +519,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
        }
    }

+    assert(l1_index < s->l1_size);
    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;

    /* seek the l2 table of the given l2 offset */
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -26,7 +26,7 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"

-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                            int64_t offset, int64_t length,
                            int addend);
@@ -38,8 +38,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
 int qcow2_refcount_init(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
-    int ret, refcount_table_size2, i;
+    unsigned int refcount_table_size2, i;
+    int ret;

+    assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
    s->refcount_table = g_malloc(refcount_table_size2);
    if (s->refcount_table_size > 0) {
@@ -85,7 +87,7 @@ static int load_refcount_block(BlockDriverState *bs,
 static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
 {
    BDRVQcowState *s = bs->opaque;
-    int refcount_table_index, block_index;
+    uint64_t refcount_table_index, block_index;
    int64_t refcount_block_offset;
    int ret;
    uint16_t *refcount_block;
@@ -189,10 +191,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
     *   they can describe them themselves.
     *
     * - We need to consider that at this point we are inside update_refcounts
-     *   and doing the initial refcount increase. This means that some clusters
-     *   have already been allocated by the caller, but their refcount isn't
-     *   accurate yet. free_cluster_index tells us where this allocation ends
-     *   as long as we don't overwrite it by freeing clusters.
+     *   and potentially doing an initial refcount increase. This means that
+     *   some clusters have already been allocated by the caller, but their
+     *   refcount isn't accurate yet. If we allocate clusters for metadata, we
+     *   need to return -EAGAIN to signal the caller that it needs to restart
+     *   the search for free clusters.
     *
     * - alloc_clusters_noref and qcow2_free_clusters may load a different
     *   refcount block into the cache
@@ -201,7 +204,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
    *refcount_block = NULL;

    /* We write to the refcount table, so we might depend on L2 tables */
-    qcow2_cache_flush(bs, s->l2_table_cache);
+    ret = qcow2_cache_flush(bs, s->l2_table_cache);
+    if (ret < 0) {
+        return ret;
+    }

    /* Allocate the refcount block itself and mark it as used */
    int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
@@ -237,7 +243,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
            goto fail_block;
        }

-        bdrv_flush(bs->file);
+        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+        if (ret < 0) {
+            goto fail_block;
+        }

        /* Initialize the new refcount block only after updating its refcount,
         * update_refcount uses the refcount cache itself */
@@ -270,7 +279,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
        }

        s->refcount_table[refcount_table_index] = new_block;
-        return 0;
+
+        /* The new refcount block may be where the caller intended to put its
+         * data, so let it restart the search. */
+        return -EAGAIN;
    }

    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@@ -293,8 +305,11 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Calculate the number of refcount blocks needed so far */
    uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
-    uint64_t blocks_used = (s->free_cluster_index +
-        refcount_block_clusters - 1) / refcount_block_clusters;
+    uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
+
+    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+        return -EFBIG;
+    }

    /* And now we need at least one block more for the new metadata */
    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@@ -327,8 +342,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));

-    assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
    /* Fill the new refcount table */
    memcpy(new_table, s->refcount_table,
        s->refcount_table_size * sizeof(uint64_t));
@@ -391,17 +404,18 @@ static int alloc_refcount_block(BlockDriverState *bs,
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;

-    /* Free old table. Remember, we must not change free_cluster_index */
-    uint64_t old_free_cluster_index = s->free_cluster_index;
+    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
-    s->free_cluster_index = old_free_cluster_index;

    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
    if (ret < 0) {
        return ret;
    }

-    return 0;
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;

 fail_table:
    g_free(new_table);
@@ -539,15 +553,16 @@ static int update_cluster_refcount(BlockDriverState *bs,


 /* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
 {
    BDRVQcowState *s = bs->opaque;
-    int i, nb_clusters, refcount;
+    uint64_t i, nb_clusters;
+    int refcount;

    nb_clusters = size_to_clusters(s, size);
 retry:
    for(i = 0; i < nb_clusters; i++) {
-        int64_t next_cluster_index = s->free_cluster_index++;
+        uint64_t next_cluster_index = s->free_cluster_index++;
        refcount = get_refcount(bs, next_cluster_index);

        if (refcount < 0) {
@@ -564,18 +579,21 @@ retry:
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
 }

-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
 {
    int64_t offset;
    int ret;

    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
-    offset = alloc_clusters_noref(bs, size);
-    if (offset < 0) {
-        return offset;
-    }
+    do {
+        offset = alloc_clusters_noref(bs, size);
+        if (offset < 0) {
+            return offset;
+        }
+
+        ret = update_refcount(bs, offset, size, 1);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, size, 1);
    if (ret < 0) {
        return ret;
    }
@@ -588,32 +606,29 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t cluster_index;
-    uint64_t old_free_cluster_index;
    int i, refcount, ret;

-    /* Check how many clusters there are free */
-    cluster_index = offset >> s->cluster_bits;
-    for(i = 0; i < nb_clusters; i++) {
-        refcount = get_refcount(bs, cluster_index++);
+    do {
+        /* Check how many clusters there are free */
+        cluster_index = offset >> s->cluster_bits;
+        for(i = 0; i < nb_clusters; i++) {
+            refcount = get_refcount(bs, cluster_index++);

-        if (refcount < 0) {
-            return refcount;
-        } else if (refcount != 0) {
-            break;
+            if (refcount < 0) {
+                return refcount;
+            } else if (refcount != 0) {
+                break;
+            }
        }
-    }

-    /* And then allocate them */
-    old_free_cluster_index = s->free_cluster_index;
-    s->free_cluster_index = cluster_index + i;
+        /* And then allocate them */
+        ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
    if (ret < 0) {
        return ret;
    }

-    s->free_cluster_index = old_free_cluster_index;
-
    return i;
 }

@@ -884,8 +899,7 @@ static void inc_refcounts(BlockDriverState *bs,
                          int64_t offset, int64_t size)
 {
    BDRVQcowState *s = bs->opaque;
-    int64_t start, last, cluster_offset;
-    int k;
+    uint64_t start, last, cluster_offset, k;

    if (size <= 0)
        return;
@@ -895,11 +909,7 @@ static void inc_refcounts(BlockDriverState *bs,
    for(cluster_offset = start; cluster_offset <= last;
        cluster_offset += s->cluster_size) {
        k = cluster_offset >> s->cluster_bits;
-        if (k < 0) {
-            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
-                cluster_offset);
-            res->corruptions++;
-        } else if (k >= refcount_table_size) {
+        if (k >= refcount_table_size) {
            fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
                "the end of the image file, can't properly check refcounts.\n",
                cluster_offset);
@@ -1112,14 +1122,19 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix)
 {
    BDRVQcowState *s = bs->opaque;
-    int64_t size, i;
-    int nb_clusters, refcount1, refcount2;
+    int64_t size, i, nb_clusters;
+    int refcount1, refcount2;
    QCowSnapshot *sn;
    uint16_t *refcount_table;
    int ret;

    size = bdrv_getlength(bs->file);
    nb_clusters = size_to_clusters(s, size);
+    if (nb_clusters > INT_MAX) {
+        res->check_errors++;
+        return -EFBIG;
+    }
+
    refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));

    /* header */
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -26,31 +26,6 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"

-typedef struct QEMU_PACKED QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
-    uint64_t vm_state_size_large;
-    uint64_t disk_size;
-} QCowSnapshotExtraData;
-
 void qcow2_free_snapshots(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
@@ -141,8 +116,14 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        }
        offset += name_size;
        sn->name[name_size] = '\0';
+
+        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }

+    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

@@ -163,7 +144,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset;
+    int64_t offset, snapshots_offset = 0;
    int ret;

    /* compute the size of the snapshots */
@@ -175,16 +156,26 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
+
+        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }
+
+    assert(offset <= INT_MAX);
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
-    bdrv_flush(bs->file);
    offset = snapshots_offset;
    if (offset < 0) {
        return offset;
    }
+    ret = bdrv_flush(bs);
+    if (ret < 0) {
+        return ret;
+    }

    /* Write all snapshots to the new list */
    for(i = 0; i < s->nb_snapshots; i++) {
@@ -322,6 +313,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

+    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
+        return -EFBIG;
+    }
+
    memset(sn, 0, sizeof(*sn));

    /* Generate an ID if it wasn't passed */
@@ -636,7 +631,11 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    new_l1_bytes = s->l1_size * sizeof(uint64_t);
+    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
+        error_report("Snapshot L1 table too large");
+        return -EFBIG;
+    }
+    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -285,12 +285,40 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
    return ret;
 }

+static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t entries, size_t entry_len)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t size;
+
+    /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
+     * because values will be passed to qemu functions taking int64_t. */
+    if (entries > INT64_MAX / entry_len) {
+        return -EINVAL;
+    }
+
+    size = entries * entry_len;
+
+    if (INT64_MAX - size < offset) {
+        return -EINVAL;
+    }
+
+    /* Tables must be cluster aligned */
+    if (offset & (s->cluster_size - 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static int qcow2_open(BlockDriverState *bs, int flags)
 {
    BDRVQcowState *s = bs->opaque;
-    int len, i, ret = 0;
+    unsigned int len, i;
+    int ret = 0;
    QCowHeader header;
    uint64_t ext_end;
+    uint64_t l1_vm_state_index;

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
@@ -322,6 +350,19 @@ static int qcow2_open(BlockDriverState *bs, int flags)

    s->qcow_version = header.version;

+    /* Initialise cluster size */
+    if (header.cluster_bits < MIN_CLUSTER_BITS ||
+        header.cluster_bits > MAX_CLUSTER_BITS) {
+        report_unsupported(bs, "Unsupported cluster size: 2^%i",
+                           header.cluster_bits);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->cluster_bits = header.cluster_bits;
+    s->cluster_size = 1 << s->cluster_bits;
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+
    /* Initialise version 3 header fields */
    if (header.version == 2) {
        header.incompatible_features    = 0;
@@ -335,6 +376,18 @@ static int qcow2_open(BlockDriverState *bs, int flags)
        be64_to_cpus(&header.autoclear_features);
        be32_to_cpus(&header.refcount_order);
        be32_to_cpus(&header.header_length);
+
+        if (header.header_length < 104) {
+            report_unsupported(bs, "qcow2 header too short");
+            ret = -EINVAL;
+            goto fail;
+        }
+    }
+
+    if (header.header_length > s->cluster_size) {
+        report_unsupported(bs, "qcow2 header exceeds cluster size");
+        ret = -EINVAL;
+        goto fail;
    }

    if (header.header_length > sizeof(header)) {
@@ -347,6 +400,12 @@ static int qcow2_open(BlockDriverState *bs, int flags)
        }
    }

+    if (header.backing_file_offset > s->cluster_size) {
+        report_unsupported(bs, "Invalid backing file offset");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (header.backing_file_offset) {
        ext_end = header.backing_file_offset;
    } else {
@@ -377,11 +436,6 @@ static int qcow2_open(BlockDriverState *bs, int flags)
        goto fail;
    }

-    if (header.cluster_bits < MIN_CLUSTER_BITS ||
-        header.cluster_bits > MAX_CLUSTER_BITS) {
-        ret = -EINVAL;
-        goto fail;
-    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
        ret = -EINVAL;
        goto fail;
@@ -390,32 +444,77 @@ static int qcow2_open(BlockDriverState *bs, int flags)
    if (s->crypt_method_header) {
        bs->encrypted = 1;
    }
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+
    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
    s->l2_size = 1 << s->l2_bits;
    bs->total_sectors = header.size / 512;
    s->csize_shift = (62 - (s->cluster_bits - 8));
    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+
    s->refcount_table_offset = header.refcount_table_offset;
    s->refcount_table_size =
        header.refcount_table_clusters << (s->cluster_bits - 3);

-    s->snapshots_offset = header.snapshots_offset;
-    s->nb_snapshots = header.nb_snapshots;
+    if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
+        report_unsupported(bs, "Reference count table too large");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = validate_table_offset(bs, s->refcount_table_offset,
+                                s->refcount_table_size, sizeof(uint64_t));
+    if (ret < 0) {
+        report_unsupported(bs, "Invalid reference count table offset");
+        goto fail;
+    }
+
+    /* Snapshot table offset/length */
+    if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
+        report_unsupported(bs, "Too many snapshots");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = validate_table_offset(bs, header.snapshots_offset,
+                                header.nb_snapshots,
+                                sizeof(QCowSnapshotHeader));
+    if (ret < 0) {
+        report_unsupported(bs, "Invalid snapshot table offset");
+        goto fail;
+    }

    /* read the level 1 table */
+    if (header.l1_size > QCOW_MAX_L1_SIZE) {
+        report_unsupported(bs, "Active L1 table too large");
+        ret = -EFBIG;
+        goto fail;
+    }
    s->l1_size = header.l1_size;
-    s->l1_vm_state_index = size_to_l1(s, header.size);
+
+    l1_vm_state_index = size_to_l1(s, header.size);
+    if (l1_vm_state_index > INT_MAX) {
+        ret = -EFBIG;
+        goto fail;
+    }
+    s->l1_vm_state_index = l1_vm_state_index;
+
    /* the L1 table must contain at least enough entries to put
       header.size bytes */
    if (s->l1_size < s->l1_vm_state_index) {
        ret = -EINVAL;
        goto fail;
    }
+
+    ret = validate_table_offset(bs, header.l1_table_offset,
+                                header.l1_size, sizeof(uint64_t));
+    if (ret < 0) {
+        report_unsupported(bs, "Invalid L1 table offset");
+        goto fail;
+    }
    s->l1_table_offset = header.l1_table_offset;
+
+
    if (s->l1_size > 0) {
        s->l1_table = g_malloc0(
            align_offset(s->l1_size * sizeof(uint64_t), 512));
@@ -456,8 +555,10 @@ static int qcow2_open(BlockDriverState *bs, int flags)
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
-        if (len > 1023) {
-            len = 1023;
+        if (len > MIN(1023, s->cluster_size - header.backing_file_offset)) {
+            report_unsupported(bs, "Backing file name too long");
+            ret = -EINVAL;
+            goto fail;
        }
        ret = bdrv_pread(bs->file, header.backing_file_offset,
                         bs->backing_file, len);
@@ -467,6 +568,10 @@ static int qcow2_open(BlockDriverState *bs, int flags)
        bs->backing_file[len] = '\0';
    }

+    /* Internal snapshots */
+    s->snapshots_offset = header.snapshots_offset;
+    s->nb_snapshots = header.nb_snapshots;
+
    ret = qcow2_read_snapshots(bs);
    if (ret < 0) {
        goto fail;
@@ -584,7 +689,7 @@ static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
        *pnum = 0;
    }

-    return (cluster_offset != 0);
+    return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
 }

 /* handle reading after the end of the backing file */
@@ -665,10 +770,6 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
            break;

        case QCOW2_CLUSTER_ZERO:
-            if (s->qcow_version < 3) {
-                ret = -EIO;
-                goto fail;
-            }
            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
            break;

@@ -1205,7 +1306,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    BlockDriverState* bs;
    QCowHeader header;
-    uint8_t* refcount_table;
+    uint64_t* refcount_table;
    int ret;

    ret = bdrv_create_file(filename, options);
@@ -1247,9 +1348,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    /* Write an empty refcount table */
-    refcount_table = g_malloc0(cluster_size);
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+    /* Write a refcount table with one refcount block */
+    refcount_table = g_malloc0(2 * cluster_size);
+    refcount_table[0] = cpu_to_be64(2 * cluster_size);
+    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
    g_free(refcount_table);

    if (ret < 0) {
@@ -1271,7 +1373,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
    if (ret < 0) {
        goto out;

@@ -1433,7 +1535,8 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
 static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVQcowState *s = bs->opaque;
-    int ret, new_l1_size;
+    int64_t new_l1_size;
+    int ret;

    if (offset & 511) {
        error_report("The new size must be a multiple of 512");
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -38,6 +38,19 @@
 #define QCOW_CRYPT_AES  1

 #define QCOW_MAX_CRYPT_CLUSTERS 32
+#define QCOW_MAX_SNAPSHOTS 65536
+
+/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_REFTABLE_SIZE 0x800000
+
+/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_L1_SIZE 0x2000000
+
+/* Allow for an average of 1k per snapshot table entry, should be plenty of
+ * space for snapshot names and IDs */
+#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1LL << 63)
@@ -82,6 +95,32 @@ typedef struct QCowHeader {
    uint32_t header_length;
 } QCowHeader;

+typedef struct QEMU_PACKED QCowSnapshotHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint16_t id_str_size;
+    uint16_t name_size;
+
+    uint32_t date_sec;
+    uint32_t date_nsec;
+
+    uint64_t vm_clock_nsec;
+
+    uint32_t vm_state_size;
+    uint32_t extra_data_size; /* for extension */
+    /* extra data follows */
+    /* id_str follows */
+    /* name follows  */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+    uint64_t vm_state_size_large;
+    uint64_t disk_size;
+} QCowSnapshotExtraData;
+
+
 typedef struct QCowSnapshot {
    uint64_t l1_table_offset;
    uint32_t l1_size;
@@ -157,8 +196,8 @@ typedef struct BDRVQcowState {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    int64_t free_cluster_index;
-    int64_t free_byte_offset;
+    uint64_t free_cluster_index;
+    uint64_t free_byte_offset;

    CoMutex lock;

@@ -168,7 +207,7 @@ typedef struct BDRVQcowState {
    AES_KEY aes_decrypt_key;
    uint64_t snapshots_offset;
    int snapshots_size;
-    int nb_snapshots;
+    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

    int flags;
@@ -267,7 +306,7 @@ static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
 }

-static inline int size_to_l1(BDRVQcowState *s, int64_t size)
+static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
 {
    int shift = s->cluster_bits + s->l2_bits;
    return (size + (1ULL << shift) - 1) >> shift;
@@ -279,6 +318,11 @@ static inline int64_t align_offset(int64_t offset, int n)
    return offset;
 }

+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
+{
+    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
+}
+
 static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
@@ -311,7 +355,7 @@ int qcow2_update_header(BlockDriverState *bs);
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
@@ -327,7 +371,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix);

 /* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size);
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+                        bool exact_size);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -142,6 +142,9 @@ typedef struct BDRVRawState {
    bool is_xfs : 1;
 #endif
    bool has_discard : 1;
+#ifdef CONFIG_FIEMAP
+    bool skip_fiemap;
+#endif
 } BDRVRawState;

 typedef struct BDRVRawReopenState {
@@ -1035,6 +1038,79 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
    return result;
 }

+static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
+                      off_t *hole, int nb_sectors, int *pnum)
+{
+#ifdef CONFIG_FIEMAP
+    BDRVRawState *s = bs->opaque;
+    struct {
+        struct fiemap fm;
+        struct fiemap_extent fe;
+    } f;
+
+    if (s->skip_fiemap) {
+        return 1;
+    }
+
+    f.fm.fm_start = start;
+    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
+    f.fm.fm_flags = FIEMAP_FLAG_SYNC;
+    f.fm.fm_extent_count = 1;
+    f.fm.fm_reserved = 0;
+    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
+        /* Assume everything is allocated.  */
+        s->skip_fiemap = true;
+        return 1;
+    }
+
+    if (f.fm.fm_mapped_extents == 0) {
+        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
+         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
+         */
+        off_t length = lseek(s->fd, 0, SEEK_END);
+        *hole = f.fm.fm_start;
+        *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
+    } else {
+        *data = f.fe.fe_logical;
+        *hole = f.fe.fe_logical + f.fe.fe_length;
+    }
+    return 0;
+#else
+    return 1;
+#endif
+}
+
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
+                             off_t *hole, int *pnum)
+{
+#if defined SEEK_HOLE && defined SEEK_DATA
+    BDRVRawState *s = bs->opaque;
+
+    *hole = lseek(s->fd, start, SEEK_HOLE);
+    if (*hole == -1) {
+        /* -ENXIO indicates that sector_num was past the end of the file.
+         * There is a virtual hole there.  */
+        assert(errno != -ENXIO);
+
+        return 1;
+    }
+
+    if (*hole > start) {
+        *data = start;
+    } else {
+        /* On a hole.  We need another syscall to find its end.  */
+        *data = lseek(s->fd, start, SEEK_DATA);
+        if (*data == -1) {
+            *data = lseek(s->fd, 0, SEEK_END);
+        }
+    }
+    return 0;
+#else
+    return 1;
+#endif
+}
+
+
 /*
 * Returns true iff the specified sector is present in the disk image. Drivers
 * not implementing the functionality are assumed to not support backing files,
@@ -1054,7 +1130,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum)
 {
-    off_t start, data, hole;
+    off_t start, data = 0, hole = 0;
    int ret;

    ret = fd_open(bs);
@@ -1064,65 +1140,15 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,

    start = sector_num * BDRV_SECTOR_SIZE;

-#ifdef CONFIG_FIEMAP
-
-    BDRVRawState *s = bs->opaque;
-    struct {
-        struct fiemap fm;
-        struct fiemap_extent fe;
-    } f;
-
-    f.fm.fm_start = start;
-    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
-    f.fm.fm_flags = 0;
-    f.fm.fm_extent_count = 1;
-    f.fm.fm_reserved = 0;
-    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
-        /* Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return 1;
-    }
-
-    if (f.fm.fm_mapped_extents == 0) {
-        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
-         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
-         */
-        off_t length = lseek(s->fd, 0, SEEK_END);
-        hole = f.fm.fm_start;
-        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
-    } else {
-        data = f.fe.fe_logical;
-        hole = f.fe.fe_logical + f.fe.fe_length;
-    }
-
-#elif defined SEEK_HOLE && defined SEEK_DATA
-
-    BDRVRawState *s = bs->opaque;
-
-    hole = lseek(s->fd, start, SEEK_HOLE);
-    if (hole == -1) {
-        /* -ENXIO indicates that sector_num was past the end of the file.
-         * There is a virtual hole there.  */
-        assert(errno != -ENXIO);
-
-        /* Most likely EINVAL.  Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return 1;
-    }
-
-    if (hole > start) {
-        data = start;
-    } else {
-        /* On a hole.  We need another syscall to find its end.  */
-        data = lseek(s->fd, start, SEEK_DATA);
-        if (data == -1) {
-            data = lseek(s->fd, 0, SEEK_END);
+    ret = try_seek_hole(bs, start, &data, &hole, pnum);
+    if (ret) {
+        ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
+        if (ret) {
+            /* Assume everything is allocated. */
+            data = 0;
+            hole = start + nb_sectors * BDRV_SECTOR_SIZE;
        }
    }
-#else
-    *pnum = nb_sectors;
-    return 1;
-#endif

    if (data <= start) {
        /* On a data extent, compute sectors to the end of the extent.  */
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@
 typedef enum {
    RBD_AIO_READ,
    RBD_AIO_WRITE,
-    RBD_AIO_DISCARD
+    RBD_AIO_DISCARD,
+    RBD_AIO_FLUSH
 } RBDAIOCmd;

 typedef struct RBDAIOCB {
@@ -379,8 +380,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)

    r = rcb->ret;

-    if (acb->cmd == RBD_AIO_WRITE ||
-        acb->cmd == RBD_AIO_DISCARD) {
+    if (acb->cmd != RBD_AIO_READ) {
        if (r < 0) {
            acb->ret = r;
            acb->error = 1;
@@ -658,6 +658,16 @@ static int rbd_aio_discard_wrapper(rbd_image_t image,
 #endif
 }

+static int rbd_aio_flush_wrapper(rbd_image_t image,
+                                 rbd_completion_t comp)
+{
+#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
+    return rbd_aio_flush(image, comp);
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
                                       int64_t sector_num,
                                       QEMUIOVector *qiov,
@@ -678,7 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
-    if (cmd == RBD_AIO_DISCARD) {
+    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
        acb->bounce = NULL;
    } else {
        acb->bounce = qemu_blockalign(bs, qiov->size);
@@ -722,6 +732,9 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
        break;
+    case RBD_AIO_FLUSH:
+        r = rbd_aio_flush_wrapper(s->image, c);
+        break;
    default:
        r = -EINVAL;
    }
@@ -761,6 +774,16 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                         RBD_AIO_WRITE);
 }

+#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
+static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
+{
+    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
+}
+
+#else
+
 static int qemu_rbd_co_flush(BlockDriverState *bs)
 {
 #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
@@ -771,6 +794,7 @@ static int qemu_rbd_co_flush(BlockDriverState *bs)
    return 0;
 #endif
 }
+#endif

 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
@@ -948,7 +972,12 @@ static BlockDriver bdrv_rbd = {

    .bdrv_aio_readv         = qemu_rbd_aio_readv,
    .bdrv_aio_writev        = qemu_rbd_aio_writev,
+
+#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
+    .bdrv_aio_flush         = qemu_rbd_aio_flush,
+#else
    .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
+#endif

 #ifdef LIBRBD_SUPPORTS_DISCARD
    .bdrv_aio_discard       = qemu_rbd_aio_discard,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -549,7 +549,7 @@ static coroutine_fn void do_co_req(void *opaque)
    co = qemu_coroutine_self();
    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, NULL, co);

-    socket_set_block(sockfd);
+    qemu_set_block(sockfd);
    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
        goto out;
@@ -579,7 +579,7 @@ static coroutine_fn void do_co_req(void *opaque)
    ret = 0;
 out:
    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);
-    socket_set_nonblock(sockfd);
+    qemu_set_nonblock(sockfd);

    srco->ret = ret;
    srco->finished = true;
@@ -812,7 +812,7 @@ static int get_sheep_fd(BDRVSheepdogState *s)
        return fd;
    }

-    socket_set_nonblock(fd);
+    qemu_set_nonblock(fd);

    ret = set_nodelay(fd);
    if (ret) {
--- a/block/tar.c
+++ b/block/tar.c
@@ -0,0 +1,365 @@
+/*
+ * Tar block driver
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("tar: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE      512
+
+#define POSIX_TAR_MAGIC  "ustar"
+#define OFFS_LENGTH      0x7c
+#define OFFS_TYPE        0x9c
+#define OFFS_MAGIC       0x101
+
+#define OFFS_S_SP        0x182
+#define OFFS_S_EXT       0x1e2
+#define OFFS_S_LENGTH    0x1e3
+#define OFFS_SX_EXT      0x1f8
+
+typedef struct SparseCache {
+    uint64_t start;
+    uint64_t end;
+} SparseCache;
+
+typedef struct BDRVTarState {
+    BlockDriverState *hd;
+    size_t file_sec;
+    uint64_t file_len;
+    SparseCache *sparse;
+    int sparse_num;
+    uint64_t last_end;
+    char longfile[2048];
+} BDRVTarState;
+
+static int tar_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    if (buf_size < OFFS_MAGIC + 5)
+        return 0;
+
+    /* we only support newer tar */
+    if (!strncmp((char*)buf + OFFS_MAGIC, POSIX_TAR_MAGIC, 5))
+        return 100;
+
+    return 0;
+}
+
+static int str_ends(char *str, const char *end)
+{
+    int end_len = strlen(end);
+    int str_len = strlen(str);
+
+    if (str_len < end_len)
+        return 0;
+
+    return !strncmp(str + str_len - end_len, end, end_len);
+}
+
+static int is_target_file(BlockDriverState *bs, char *filename,
+                          char *header)
+{
+    int retval = 0;
+
+    if (str_ends(filename, ".raw"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow2"))
+        retval = 1;
+
+    if (str_ends(filename, ".vmdk"))
+        retval = 1;
+
+    if (retval &&
+        (header[OFFS_TYPE] != '0') &&
+        (header[OFFS_TYPE] != 'S')) {
+        retval = 0;
+    }
+
+    dprintf("does filename %s match? %s\n", filename, retval ? "yes" : "no");
+
+    /* make sure we're not using this name again */
+    filename[0] = '\0';
+
+    return retval;
+}
+
+static uint64_t tar2u64(char *ptr)
+{
+    uint64_t retval;
+    char oldend = ptr[12];
+
+    ptr[12] = '\0';
+    if (*ptr & 0x80) {
+        /* XXX we only support files up to 64 bit length */
+        retval = be64_to_cpu(*(uint64_t *)(ptr+4));
+        dprintf("Convert %lx -> %#lx\n", *(uint64_t*)(ptr+4), retval);
+    } else {
+        retval = strtol(ptr, NULL, 8);
+        dprintf("Convert %s -> %#lx\n", ptr, retval);
+    }
+
+    ptr[12] = oldend;
+
+    return retval;
+}
+
+static void tar_sparse(BDRVTarState *s, uint64_t offs, uint64_t len)
+{
+    SparseCache *sparse;
+
+    if (!len)
+        return;
+    if (!(offs - s->last_end)) {
+        s->last_end += len;
+        return;
+    }
+    if (s->last_end > offs)
+        return;
+
+    dprintf("Last chunk until %lx new chunk at %lx\n", s->last_end, offs);
+
+    s->sparse = g_realloc(s->sparse, (s->sparse_num + 1) * sizeof(SparseCache));
+    sparse = &s->sparse[s->sparse_num];
+    sparse->start = s->last_end;
+    sparse->end = offs;
+    s->last_end = offs + len;
+    s->sparse_num++;
+    dprintf("Sparse at %lx end=%lx\n", sparse->start,
+                                       sparse->end);
+}
+
+static int tar_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVTarState *s = bs->opaque;
+    char header[SECTOR_SIZE];
+    char *real_file = header;
+    char *magic;
+    const char *fname = filename;
+    size_t header_offs = 0;
+    int ret;
+
+    if (!strncmp(filename, "tar://", 6))
+        fname += 6;
+    else if (!strncmp(filename, "tar:", 4))
+        fname += 4;
+
+    ret = bdrv_file_open(&s->hd, fname, flags);
+    if (ret < 0)
+        return ret;
+
+    /* Search the file for an image */
+
+    do {
+        /* tar header */
+        if (bdrv_pread(s->hd, header_offs, header, SECTOR_SIZE) != SECTOR_SIZE)
+            goto fail;
+
+        if ((header_offs > 1) && !header[0]) {
+            fprintf(stderr, "Tar: No image file found in archive\n");
+            goto fail;
+        }
+
+        magic = &header[OFFS_MAGIC];
+        if (strncmp(magic, POSIX_TAR_MAGIC, 5)) {
+            fprintf(stderr, "Tar: Invalid magic: %s\n", magic);
+            goto fail;
+        }
+
+        dprintf("file type: %c\n", header[OFFS_TYPE]);
+
+        /* file length*/
+        s->file_len = (tar2u64(&header[OFFS_LENGTH]) + (SECTOR_SIZE - 1)) &
+                      ~(SECTOR_SIZE - 1);
+        s->file_sec = (header_offs / SECTOR_SIZE) + 1;
+
+        header_offs += s->file_len + SECTOR_SIZE;
+
+        if (header[OFFS_TYPE] == 'L') {
+            bdrv_pread(s->hd, header_offs - s->file_len, s->longfile,
+                       sizeof(s->longfile));
+            s->longfile[sizeof(s->longfile)-1] = '\0';
+            real_file = header;
+        } else if (s->longfile[0]) {
+            real_file = s->longfile;
+        } else {
+            real_file = header;
+        }
+    } while(!is_target_file(bs, real_file, header));
+
+    /* We found an image! */
+
+    if (header[OFFS_TYPE] == 'S') {
+        uint8_t isextended;
+        int i;
+
+        for (i = OFFS_S_SP; i < (OFFS_S_SP + (4 * 24)); i += 24)
+            tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+
+        s->file_len = tar2u64(&header[OFFS_S_LENGTH]);
+        isextended = header[OFFS_S_EXT];
+
+        while (isextended) {
+            if (bdrv_pread(s->hd, s->file_sec * SECTOR_SIZE, header,
+                           SECTOR_SIZE) != SECTOR_SIZE)
+                goto fail;
+
+            for (i = 0; i < (21 * 24); i += 24)
+                tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+            isextended = header[OFFS_SX_EXT];
+            s->file_sec++;
+        }
+        tar_sparse(s, s->file_len, 1);
+    }
+
+    return 0;
+
+fail:
+    fprintf(stderr, "Tar: Error opening file\n");
+    bdrv_delete(s->hd);
+    return -EINVAL;
+}
+
+typedef struct TarAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+} TarAIOCB;
+
+/* This callback gets invoked when we have pure sparseness */
+static void tar_sparse_cb(void *opaque)
+{
+    TarAIOCB *acb = (TarAIOCB *)opaque;
+
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+static void tar_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+}
+
+static AIOCBInfo tar_aiocb_info = {
+    .aiocb_size         = sizeof(TarAIOCB),
+    .cancel             = tar_aio_cancel,
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockDriverAIOCB *tar_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVTarState *s = bs->opaque;
+    SparseCache *sparse;
+    int64_t sec_file = sector_num + s->file_sec;
+    int64_t start = sector_num * SECTOR_SIZE;
+    int64_t end = start + (nb_sectors * SECTOR_SIZE);
+    int i;
+    TarAIOCB *acb;
+
+    for (i = 0; i < s->sparse_num; i++) {
+        sparse = &s->sparse[i];
+        if (sparse->start > end) {
+            /* We expect the cache to be start increasing */
+            break;
+        } else if ((sparse->start < start) && (sparse->end <= start)) {
+            /* sparse before our offset */
+            sec_file -= (sparse->end - sparse->start) / SECTOR_SIZE;
+        } else if ((sparse->start <= start) && (sparse->end >= end)) {
+            /* all our sectors are sparse */
+            char *buf = g_malloc0(nb_sectors * SECTOR_SIZE);
+
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            g_free(buf);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        } else if (((sparse->start >= start) && (sparse->start < end)) ||
+                   ((sparse->end >= start) && (sparse->end < end))) {
+            /* we're semi-sparse (worst case) */
+            /* let's go synchronous and read all sectors individually */
+            char *buf = g_malloc(nb_sectors * SECTOR_SIZE);
+            uint64_t offs;
+
+            for (offs = 0; offs < (nb_sectors * SECTOR_SIZE);
+                 offs += SECTOR_SIZE) {
+                bdrv_pread(bs, (sector_num * SECTOR_SIZE) + offs,
+                           buf + offs, SECTOR_SIZE);
+            }
+
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    return bdrv_aio_readv(s->hd, sec_file, qiov, nb_sectors,
+                          cb, opaque);
+}
+
+static void tar_close(BlockDriverState *bs)
+{
+    dprintf("Close\n");
+}
+
+static int64_t tar_getlength(BlockDriverState *bs)
+{
+    BDRVTarState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_tar = {
+    .format_name     = "tar",
+    .protocol_name   = "tar",
+
+    .instance_size   = sizeof(BDRVTarState),
+    .bdrv_file_open  = tar_open,
+    .bdrv_close      = tar_close,
+    .bdrv_getlength  = tar_getlength,
+    .bdrv_probe      = tar_probe,
+
+    .bdrv_aio_readv  = tar_aio_readv,
+};
+
+static void tar_block_init(void)
+{
+    bdrv_register(&bdrv_tar);
+}
+
+block_init(tar_block_init);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -120,6 +120,11 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

+/* max blocks in image is (0xffffffff / 4) */
+#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
+#define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
+                                  (uint64_t)DEFAULT_CLUSTER_SIZE)
+
 #if !defined(CONFIG_UUID)
 static inline void uuid_generate(uuid_t out)
 {
@@ -383,6 +388,14 @@ static int vdi_open(BlockDriverState *bs, int flags)
    vdi_header_print(&header);
 #endif

+    if (header.disk_size > VDI_DISK_SIZE_MAX) {
+        logout("Unsupported VDI image size (size is 0x%" PRIx64
+               ", max supported is 0x%" PRIx64 ")\n",
+               header.disk_size, VDI_DISK_SIZE_MAX);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
    if (header.disk_size % SECTOR_SIZE != 0) {
        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
           We accept them but round the disk size to the next multiple of
@@ -415,8 +428,9 @@ static int vdi_open(BlockDriverState *bs, int flags)
        logout("unsupported sector size %u B\n", header.sector_size);
        ret = -ENOTSUP;
        goto fail;
-    } else if (header.block_size != 1 * MiB) {
-        logout("unsupported block size %u B\n", header.block_size);
+    } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
+        logout("unsupported VDI image (block size %u is not %u)\n",
+               header.block_size, DEFAULT_CLUSTER_SIZE);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.disk_size >
@@ -432,6 +446,11 @@ static int vdi_open(BlockDriverState *bs, int flags)
        logout("parent uuid != 0, unsupported\n");
        ret = -ENOTSUP;
        goto fail;
+    } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) {
+        logout("unsupported VDI image (too many blocks %u, max is %u)\n",
+               header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX);
+        ret = -ENOTSUP;
+        goto fail;
    }

    bs->total_sectors = header.disk_size / SECTOR_SIZE;
@@ -668,11 +687,20 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options)
        options++;
    }

+    if (bytes > VDI_DISK_SIZE_MAX) {
+        result = -ENOTSUP;
+        logout("Unsupported VDI image size (size is 0x%" PRIx64
+               ", max supported is 0x%" PRIx64 ")\n",
+               bytes, VDI_DISK_SIZE_MAX);
+        goto exit;
+    }
+
    fd = qemu_open(filename,
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
                   0644);
    if (fd < 0) {
-        return -errno;
+        result = -errno;
+        goto exit;
    }

    /* We need enough blocks to store the given disk size,
@@ -733,6 +761,7 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options)
        result = -errno;
    }

+exit:
    return result;
 }

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -45,6 +45,8 @@ enum vhd_type {
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
 #define VHD_TIMESTAMP_BASE 946684800

+#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
+
 // always big-endian
 struct vhd_footer {
    char        creator[8]; // "conectix"
@@ -163,6 +165,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
    struct vhd_dyndisk_header* dyndisk_header;
    uint8_t buf[HEADER_SIZE];
    uint32_t checksum;
+    uint64_t computed_size;
    int disk_type = VHD_DYNAMIC;
    int ret;

@@ -211,7 +214,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;

    /* Allow a maximum disk size of approximately 2 TB */
-    if (bs->total_sectors >= 65535LL * 255 * 255) {
+    if (bs->total_sectors >= VHD_MAX_SECTORS) {
        ret = -EFBIG;
        goto fail;
    }
@@ -231,10 +234,32 @@ static int vpc_open(BlockDriverState *bs, int flags)
        }

        s->block_size = be32_to_cpu(dyndisk_header->block_size);
+        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "Invalid block size %" PRIu32, s->block_size);
+            ret = -EINVAL;
+            goto fail;
+        }
        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;

        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-        s->pagetable = g_malloc(s->max_table_entries * 4);
+
+        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        computed_size = (uint64_t) s->max_table_entries * s->block_size;
+        if (computed_size < bs->total_sectors * 512) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

@@ -280,7 +305,7 @@ static int vpc_open(BlockDriverState *bs, int flags)
    return 0;

 fail:
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -789,7 +814,7 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
 static void vpc_close(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
--- a/blockdev.c
+++ b/blockdev.c
@@ -570,7 +570,7 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
        /* add virtio block device */
        opts = qemu_opts_create_nofail(qemu_find_opts("device"));
        if (arch_type == QEMU_ARCH_S390X) {
-            qemu_opt_set(opts, "driver", "virtio-blk-s390");
+            qemu_opt_set(opts, "driver", "virtio-blk-ccw");
        } else {
            qemu_opt_set(opts, "driver", "virtio-blk-pci");
        }
@@ -1043,6 +1043,9 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
        return;
    }

+    /* complete all in-flight operations before resizing the device */
+    bdrv_drain_all();
+
    switch (bdrv_truncate(bs, size)) {
    case 0:
        break;
--- a/94
+++ b/94
@@ -283,7 +283,7 @@ sdl_config="${SDL_CONFIG-${cross_prefix}sdl-config}"
 # default flags for all hosts
 QEMU_CFLAGS="-fno-strict-aliasing $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
-QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
+QEMU_CFLAGS="-Wstrict-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
 QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include"
 if test "$debug_info" = "yes"; then
@@ -1435,6 +1435,7 @@ fi
 if test "$seccomp" != "no" ; then
    if $pkg_config --atleast-version=1.0.0 libseccomp --modversion >/dev/null 2>&1; then
        libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`"
+        QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`"
 	seccomp="yes"
    else
 	if test "$seccomp" = "yes"; then
@@ -2759,7 +2760,13 @@ if test "$libiscsi" != "no" ; then
 #include <iscsi/iscsi.h>
 int main(void) { iscsi_unmap_sync(NULL,0,0,0,NULL,0); return 0; }
 EOF
-  if compile_prog "" "-liscsi" ; then
+  if $pkg_config --atleast-version=1.7.0 libiscsi --modversion >/dev/null 2>&1; then
+    libiscsi="yes"
+    libiscsi_cflags=$($pkg_config --cflags libiscsi 2>/dev/null)
+    libiscsi_libs=$($pkg_config --libs libiscsi 2>/dev/null)
+    CFLAGS="$CFLAGS $libiscsi_cflags"
+    LIBS="$LIBS $libiscsi_libs"
+  elif compile_prog "" "-liscsi" ; then
    libiscsi="yes"
    LIBS="$LIBS -liscsi"
  else
@@ -2827,7 +2834,7 @@ EOF
  spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null)
  spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null)
  if $pkg_config --atleast-version=0.12.0 spice-server >/dev/null 2>&1 && \
-     $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1 && \
+     $pkg_config --atleast-version=0.12.3 spice-protocol > /dev/null 2>&1 && \
     compile_prog "$spice_cflags" "$spice_libs" ; then
    spice="yes"
    libs_softmmu="$libs_softmmu $spice_libs"
@@ -3029,34 +3036,67 @@ fi
 ##########################################
 # check and set a backend for coroutine

-# default is ucontext, but always fallback to gthread
-# windows autodetected by make
-if test "$coroutine" = "" -o "$coroutine" = "ucontext"; then
-  if test "$darwin" != "yes"; then
-    cat > $TMPC << EOF
+# We prefer ucontext, but it's not always possible. The fallback
+# is sigcontext. gthread is not selectable except explicitly, because
+# it is not functional enough to run QEMU proper. (It is occasionally
+# useful for debugging purposes.)  On Windows the only valid backend
+# is the Windows-specific one.
+
+ucontext_works=no
+if test "$darwin" != "yes"; then
+  cat > $TMPC << EOF
 #include <ucontext.h>
 #ifdef __stub_makecontext
 #error Ignoring glibc stub makecontext which will always fail
 #endif
 int main(void) { makecontext(0, 0, 0); return 0; }
 EOF
-    if compile_prog "" "" ; then
-        coroutine_backend=ucontext
-    else
-	coroutine_backend=gthread
-    fi
+  if compile_prog "" "" ; then
+    ucontext_works=yes
+  fi
+fi
+
+if test "$coroutine" = ""; then
+  if test "$mingw32" = "yes"; then
+    coroutine=win32
+  elif test "$ucontext_works" = "yes"; then
+    coroutine=ucontext
+  else
+    coroutine=sigaltstack
  fi
-elif test "$coroutine" = "gthread" ; then
-  coroutine_backend=gthread
-elif test "$coroutine" = "windows" ; then
-  coroutine_backend=windows
-elif test "$coroutine" = "sigaltstack" ; then
-  coroutine_backend=sigaltstack
 else
-  echo
-  echo "Error: unknown coroutine backend $coroutine"
-  echo
-  exit 1
+  case $coroutine in
+  windows)
+    if test "$mingw32" != "yes"; then
+      echo
+      echo "Error: 'windows' coroutine backend only valid for Windows"
+      echo
+      exit 1
+    fi
+    # Unfortunately the user visible backend name doesn't match the
+    # coroutine-*.c filename for this case, so we have to adjust it here.
+    coroutine=win32
+    ;;
+  ucontext)
+    if test "$ucontext_works" != "yes"; then
+      feature_not_found "ucontext"
+    fi
+    ;;
+  gthread|sigaltstack)
+    if test "$mingw32" = "yes"; then
+      echo
+      echo "Error: only the 'windows' coroutine backend is valid for Windows"
+      echo
+      exit 1
+    fi
+    ;;
+  *)
+    echo
+    echo "Error: unknown coroutine backend $coroutine"
+    echo
+    exit 1
+    ;;
+  esac
 fi

 ##########################################
@@ -3339,7 +3379,7 @@ echo "OpenGL support    $opengl"
 echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
-echo "coroutine backend $coroutine_backend"
+echo "coroutine backend $coroutine"
 echo "GlusterFS support $glusterfs"
 echo "virtio-blk-data-plane $virtio_blk_data_plane"
 echo "gcov              $gcov_tool"
@@ -3662,11 +3702,7 @@ if test "$rbd" = "yes" ; then
  echo "CONFIG_RBD=y" >> $config_host_mak
 fi

-if test "$coroutine_backend" = "ucontext" ; then
-  echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
-elif test "$coroutine_backend" = "sigaltstack" ; then
-  echo "CONFIG_SIGALTSTACK_COROUTINE=y" >> $config_host_mak
-fi
+echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak

 if test "$open_by_handle_at" = "yes" ; then
  echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -51,12 +51,32 @@ void cpu_resume_from_signal(CPUArchState *env, void *puc)
 }
 #endif

+/* Execute a TB, and fix up the CPU state afterwards if necessary */
+static inline tcg_target_ulong cpu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
+{
+    tcg_target_ulong next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
+        /* We didn't start executing this TB (eg because the instruction
+         * counter hit zero); we must restore the guest PC to the address
+         * of the start of the TB.
+         */
+        TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+        cpu_pc_from_tb(env, tb);
+    }
+    if ((next_tb & TB_EXIT_MASK) == TB_EXIT_REQUESTED) {
+        /* We were asked to stop executing TBs (probably a pending
+         * interrupt. We've now stopped, so clear the flag.
+         */
+        env->tcg_exit_req = 0;
+    }
+    return next_tb;
+}
+
 /* Execute the code without caching the generated code. An interpreter
   could be used if available. */
 static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
                             TranslationBlock *orig_tb)
 {
-    tcg_target_ulong next_tb;
    TranslationBlock *tb;

    /* Should never happen.
@@ -68,14 +88,8 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
                     max_cycles);
    env->current_tb = tb;
    /* execute the generated code */
-    next_tb = tcg_qemu_tb_exec(env, tb->tc_ptr);
+    cpu_tb_exec(env, tb->tc_ptr);
    env->current_tb = NULL;
-
-    if ((next_tb & 3) == 2) {
-        /* Restore PC.  This may happen if async event occurs before
-           the TB starts executing.  */
-        cpu_pc_from_tb(env, tb);
-    }
    tb_phys_invalidate(tb, -1);
    tb_free(tb);
 }
@@ -583,7 +597,8 @@ int cpu_exec(CPUArchState *env)
                   spans two pages, we cannot safely do a direct
                   jump. */
                if (next_tb != 0 && tb->page_addr[1] == -1) {
-                    tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
+                    tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
+                                next_tb & TB_EXIT_MASK, tb);
                }
                spin_unlock(&tb_lock);

@@ -596,13 +611,24 @@ int cpu_exec(CPUArchState *env)
                if (likely(!env->exit_request)) {
                    tc_ptr = tb->tc_ptr;
                    /* execute the generated code */
-                    next_tb = tcg_qemu_tb_exec(env, tc_ptr);
-                    if ((next_tb & 3) == 2) {
+                    next_tb = cpu_tb_exec(env, tc_ptr);
+                    switch (next_tb & TB_EXIT_MASK) {
+                    case TB_EXIT_REQUESTED:
+                        /* Something asked us to stop executing
+                         * chained TBs; just continue round the main
+                         * loop. Whatever requested the exit will also
+                         * have set something else (eg exit_request or
+                         * interrupt_request) which we will handle
+                         * next time around the loop.
+                         */
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+                        next_tb = 0;
+                        break;
+                    case TB_EXIT_ICOUNT_EXPIRED:
+                    {
                        /* Instruction counter expired.  */
                        int insns_left;
-                        tb = (TranslationBlock *)(next_tb & ~3);
-                        /* Restore PC.  */
-                        cpu_pc_from_tb(env, tb);
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
                        insns_left = env->icount_decr.u32;
                        if (env->icount_extra && insns_left >= 0) {
                            /* Refill decrementer and continue execution.  */
@@ -623,6 +649,10 @@ int cpu_exec(CPUArchState *env)
                            next_tb = 0;
                            cpu_loop_exit(env);
                        }
+                        break;
+                    }
+                    default:
+                        break;
                    }
                }
                env->current_tb = NULL;
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -157,7 +157,6 @@ static const VMStateDescription vmstate_kbd = {
    .name = "pckbd",
    .version_id = 3,
    .minimum_version_id = 3,
-    .minimum_version_id_old = 3,
    .fields      = (VMStateField []) {
        VMSTATE_UINT8(write_cmd, KBDState),
        VMSTATE_UINT8(status, KBDState),
@@ -186,12 +185,13 @@ You can see that there are several version fields:
 - minimum_version_id: the minimum version_id that VMState is able to understand
  for that device.
 - minimum_version_id_old: For devices that were not able to port to vmstate, we can
-  assign a function that knows how to read this old state.
+  assign a function that knows how to read this old state. This field is
+  ignored if there is no load_state_old handler.

 So, VMState is able to read versions from minimum_version_id to
-version_id.  And the function load_state_old() is able to load state
-from minimum_version_id_old to minimum_version_id.  This function is
-deprecated and will be removed when no more users are left.
+version_id.  And the function load_state_old() (if present) is able to
+load state from minimum_version_id_old to minimum_version_id.  This
+function is deprecated and will be removed when no more users are left.

 ===  Massaging functions ===

@@ -272,7 +272,6 @@ const VMStateDescription vmstate_ide_drive_pio_state = {
    .name = "ide_drive/pio_state",
    .version_id = 1,
    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
    .pre_save = ide_drive_pio_pre_save,
    .post_load = ide_drive_pio_post_load,
    .fields      = (VMStateField []) {
@@ -292,7 +291,6 @@ const VMStateDescription vmstate_ide_drive = {
    .name = "ide_drive",
    .version_id = 3,
    .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
    .post_load = ide_drive_post_load,
    .fields      = (VMStateField []) {
        .... several fields ....
--- a/exec.c
+++ b/exec.c
@@ -493,7 +493,7 @@ void cpu_reset_interrupt(CPUArchState *env, int mask)
 void cpu_exit(CPUArchState *env)
 {
    env->exit_request = 1;
-    cpu_unlink_tb(env);
+    env->tcg_exit_req = 1;
 }

 void cpu_abort(CPUArchState *env, const char *fmt, ...)
@@ -1080,6 +1080,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,

    qemu_ram_setup_dump(new_block->host, size);
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
+    qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);

    if (kvm_enabled())
        kvm_setup_guest_memory(new_block->host, size);
@@ -1164,7 +1165,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        offset = addr - block->offset;
        if (offset < block->length) {
-            vaddr = block->host + offset;
+            vaddr = ramblock_ptr(block, offset);
            if (block->flags & RAM_PREALLOC_MASK) {
                ;
            } else {
@@ -1255,7 +1256,7 @@ found:
                xen_map_cache(block->offset, block->length, 1);
        }
    }
-    return block->host + (addr - block->offset);
+    return ramblock_ptr(block, addr - block->offset);
 }

 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
@@ -1282,7 +1283,7 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr)
                        xen_map_cache(block->offset, block->length, 1);
                }
            }
-            return block->host + (addr - block->offset);
+            return ramblock_ptr(block, addr - block->offset);
        }
    }

@@ -1308,7 +1309,7 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
            if (addr - block->offset < block->length) {
                if (addr - block->offset + *size > block->length)
                    *size = block->length - addr + block->offset;
-                return block->host + (addr - block->offset);
+                return ramblock_ptr(block, addr - block->offset);
            }
        }

--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -9,6 +9,10 @@
 * the COPYING file in the top-level directory.
 */

+/* work around a broken sys/capability.h */
+#if defined(__i386__)
+typedef unsigned long long __u64;
+#endif
 #include <sys/resource.h>
 #include <getopt.h>
 #include <syslog.h>
--- a/hmp.c
+++ b/hmp.c
@@ -173,6 +173,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                       info->ram->total >> 10);
        monitor_printf(mon, "duplicate: %" PRIu64 " pages\n",
                       info->ram->duplicate);
+        monitor_printf(mon, "skipped: %" PRIu64 " pages\n",
+                       info->ram->skipped);
        monitor_printf(mon, "normal: %" PRIu64 " pages\n",
                       info->ram->normal);
        monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -284,7 +284,7 @@ static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
    if ((fs_ctx->export_flags & V9FS_SM_MAPPED) ||
        (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
        int fd;
-        fd = open(rpath(fs_ctx, path, buffer), O_RDONLY);
+        fd = open(rpath(fs_ctx, path, buffer), O_RDONLY | O_NOFOLLOW);
        if (fd == -1) {
            return -1;
        }
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -659,7 +659,7 @@ static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
        ret |= S_IFIFO;
    }
    if (mode & P9_STAT_MODE_DEVICE) {
-        if (extension && extension->data[0] == 'c') {
+        if (extension->size && extension->data[0] == 'c') {
            ret |= S_IFCHR;
        } else {
            ret |= S_IFBLK;
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -472,8 +472,9 @@ static const MemoryRegionOps acpi_pm_cnt_ops = {
    .endianness = DEVICE_LITTLE_ENDIAN,
 };

-void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent)
+void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val)
 {
+    ar->pm1.cnt.s4_val = s4_val;
    ar->wakeup.notify = acpi_notify_wakeup;
    qemu_register_wakeup_notifier(&ar->wakeup);
    memory_region_init_io(&ar->pm1.cnt.io, &acpi_pm_cnt_ops, ar, "acpi-cnt", 2);
--- a/hw/acpi.h
+++ b/hw/acpi.h
@@ -142,7 +142,7 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn update_sci,
                       MemoryRegion *parent);

 /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
-void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent);
+void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val);
 void acpi_pm1_cnt_update(ACPIREGS *ar,
                         bool sci_enable, bool sci_disable);
 void acpi_pm1_cnt_reset(ACPIREGS *ar);
--- a/hw/acpi_ich9.c
+++ b/hw/acpi_ich9.c
@@ -212,7 +212,7 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,

    acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
    acpi_pm1_evt_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
-    acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io);
+    acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, 2);

    acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN);
    memory_region_init_io(&pm->io_gpe, &ich9_gpe_ops, pm, "apci-gpe0",
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -266,7 +266,7 @@ static int acpi_load_old(QEMUFile *f, void *opaque, int version_id)
 static const VMStateDescription vmstate_acpi = {
    .name = "piix4_pm",
    .version_id = 3,
-    .minimum_version_id = 3,
+    .minimum_version_id = 2, /* qemu-kvm */
    .minimum_version_id_old = 1,
    .load_state_old = acpi_load_old,
    .post_load = vmstate_acpi_post_load,
@@ -418,7 +418,7 @@ static int piix4_pm_initfn(PCIDevice *dev)

    acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
    acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-    acpi_pm1_cnt_init(&s->ar, &s->io);
+    acpi_pm1_cnt_init(&s->ar, &s->io, s->s4_val);
    acpi_gpe_init(&s->ar, GPE_LEN);

    s->powerdown_notifier.notify = piix4_pm_powerdown_req;
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -172,20 +172,6 @@

 #define CIRRUS_PNPMMIO_SIZE         0x1000

-#define BLTUNSAFE(s) \
-    ( \
-        ( /* check dst is within bounds */ \
-            (s)->cirrus_blt_height * ABS((s)->cirrus_blt_dstpitch) \
-                + ((s)->cirrus_blt_dstaddr & (s)->cirrus_addr_mask) > \
-                    (s)->vga.vram_size \
-        ) || \
-        ( /* check src is within bounds */ \
-            (s)->cirrus_blt_height * ABS((s)->cirrus_blt_srcpitch) \
-                + ((s)->cirrus_blt_srcaddr & (s)->cirrus_addr_mask) > \
-                    (s)->vga.vram_size \
-        ) \
-    )
-
 struct CirrusVGAState;
 typedef void (*cirrus_bitblt_rop_t) (struct CirrusVGAState *s,
                                     uint8_t * dst, const uint8_t * src,
@@ -273,6 +259,50 @@ static void cirrus_update_memory_access(CirrusVGAState *s);
 *
 ***************************************/

+static bool blit_region_is_unsafe(struct CirrusVGAState *s,
+                                  int32_t pitch, int32_t addr)
+{
+    if (pitch < 0) {
+        int64_t min = addr
+            + ((int64_t)s->cirrus_blt_height-1) * pitch;
+        int32_t max = addr
+            + s->cirrus_blt_width;
+        if (min < 0 || max >= s->vga.vram_size) {
+            return true;
+        }
+    } else {
+        int64_t max = addr
+            + ((int64_t)s->cirrus_blt_height-1) * pitch
+            + s->cirrus_blt_width;
+        if (max >= s->vga.vram_size) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool blit_is_unsafe(struct CirrusVGAState *s)
+{
+    /* should be the case, see cirrus_bitblt_start */
+    assert(s->cirrus_blt_width > 0);
+    assert(s->cirrus_blt_height > 0);
+
+    if (s->cirrus_blt_width > CIRRUS_BLTBUFSIZE) {
+        return true;
+    }
+
+    if (blit_region_is_unsafe(s, s->cirrus_blt_dstpitch,
+                              s->cirrus_blt_dstaddr & s->cirrus_addr_mask)) {
+        return true;
+    }
+    if (blit_region_is_unsafe(s, s->cirrus_blt_srcpitch,
+                              s->cirrus_blt_srcaddr & s->cirrus_addr_mask)) {
+        return true;
+    }
+
+    return false;
+}
+
 static void cirrus_bitblt_rop_nop(CirrusVGAState *s,
                                  uint8_t *dst,const uint8_t *src,
                                  int dstpitch,int srcpitch,
@@ -630,8 +660,9 @@ static int cirrus_bitblt_common_patterncopy(CirrusVGAState * s,

    dst = s->vga.vram_ptr + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask);

-    if (BLTUNSAFE(s))
+    if (blit_is_unsafe(s)) {
        return 0;
+    }

    (*s->cirrus_rop) (s, dst, src,
                      s->cirrus_blt_dstpitch, 0,
@@ -648,8 +679,9 @@ static int cirrus_bitblt_solidfill(CirrusVGAState *s, int blt_rop)
 {
    cirrus_fill_t rop_func;

-    if (BLTUNSAFE(s))
+    if (blit_is_unsafe(s)) {
        return 0;
+    }
    rop_func = cirrus_fill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
    rop_func(s, s->vga.vram_ptr + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask),
             s->cirrus_blt_dstpitch,
@@ -745,8 +777,9 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)

 static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
 {
-    if (BLTUNSAFE(s))
+    if (blit_is_unsafe(s)) {
        return 0;
+    }

    cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
            s->cirrus_blt_srcaddr - s->vga.start_addr,
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1432,7 +1432,7 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
 {
    FDrive *cur_drv;
    uint32_t retval = 0;
-    int pos;
+    uint32_t pos;

    cur_drv = get_cur_drv(fdctrl);
    fdctrl->dsr &= ~FD_DSR_PWRDOWN;
@@ -1441,8 +1441,8 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
        return 0;
    }
    pos = fdctrl->data_pos;
+    pos %= FD_SECTOR_LEN;
    if (fdctrl->msr & FD_MSR_NONDMA) {
-        pos %= FD_SECTOR_LEN;
        if (pos == 0) {
            if (fdctrl->data_pos != 0)
                if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
@@ -1786,10 +1786,13 @@ static void fdctrl_handle_option(FDCtrl *fdctrl, int direction)
 static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction)
 {
    FDrive *cur_drv = get_cur_drv(fdctrl);
+    uint32_t pos;

-    if (fdctrl->fifo[fdctrl->data_pos - 1] & 0x80) {
+    pos = fdctrl->data_pos - 1;
+    pos %= FD_SECTOR_LEN;
+    if (fdctrl->fifo[pos] & 0x80) {
        /* Command parameters done */
-        if (fdctrl->fifo[fdctrl->data_pos - 1] & 0x40) {
+        if (fdctrl->fifo[pos] & 0x40) {
            fdctrl->fifo[0] = fdctrl->fifo[1];
            fdctrl->fifo[2] = 0;
            fdctrl->fifo[3] = 0;
@@ -1889,7 +1892,7 @@ static uint8_t command_to_handler[256];
 static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
 {
    FDrive *cur_drv;
-    int pos;
+    uint32_t pos;

    /* Reset mode */
    if (!(fdctrl->dor & FD_DOR_nRESET)) {
@@ -1937,7 +1940,9 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
    }

    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
-    fdctrl->fifo[fdctrl->data_pos++] = value;
+    pos = fdctrl->data_pos++;
+    pos %= FD_SECTOR_LEN;
+    fdctrl->fifo[pos] = value;
    if (fdctrl->data_pos == fdctrl->data_len) {
        /* We now have all parameters
         * and will be able to treat the command
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -222,6 +222,18 @@ static int hpet_pre_load(void *opaque)
    return 0;
 }

+static bool hpet_validate_num_timers(void *opaque, int version_id)
+{
+    HPETState *s = opaque;
+
+    if (s->num_timers < HPET_MIN_TIMERS) {
+        return false;
+    } else if (s->num_timers > HPET_MAX_TIMERS) {
+        return false;
+    }
+    return true;
+}
+
 static int hpet_post_load(void *opaque, int version_id)
 {
    HPETState *s = opaque;
@@ -290,6 +302,7 @@ static const VMStateDescription vmstate_hpet = {
        VMSTATE_UINT64(isr, HPETState),
        VMSTATE_UINT64(hpet_counter, HPETState),
        VMSTATE_UINT8_V(num_timers, HPETState, 2),
+        VMSTATE_VALIDATE("num_timers in range", hpet_validate_num_timers),
        VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
                                    vmstate_hpet_timer, HPETTimer),
        VMSTATE_END_OF_LIST()
--- a/hw/i8254_common.c
+++ b/hw/i8254_common.c
@@ -266,6 +266,12 @@ static int pit_dispatch_post_load(void *opaque, int version_id)
    return 0;
 }

+static bool is_qemu_kvm(void *opaque, int version_id)
+{
+    /* HACK: We ignore incoming migration from upstream qemu */
+    return version_id < 3;
+}
+
 static const VMStateDescription vmstate_pit_common = {
    .name = "i8254",
    .version_id = 3,
@@ -275,6 +281,7 @@ static const VMStateDescription vmstate_pit_common = {
    .pre_save = pit_dispatch_pre_save,
    .post_load = pit_dispatch_post_load,
    .fields = (VMStateField[]) {
+        VMSTATE_UNUSED_TEST(is_qemu_kvm, 4),
        VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3),
        VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2,
                             vmstate_pit_channel, PITChannelState),
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1270,7 +1270,7 @@ const VMStateDescription vmstate_ahci = {
        VMSTATE_UINT32(control_regs.impl, AHCIState),
        VMSTATE_UINT32(control_regs.version, AHCIState),
        VMSTATE_UINT32(idp_index, AHCIState),
-        VMSTATE_INT32(ports, AHCIState),
+        VMSTATE_INT32_EQUAL(ports, AHCIState),
        VMSTATE_END_OF_LIST()
    },
 };
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1603,7 +1603,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 		case 2: /* extended self test */
 		s->smart_selftest_count++;
 		if(s->smart_selftest_count > 21)
-			s->smart_selftest_count = 0;
+			s->smart_selftest_count = 1;
 		n = 2 + (s->smart_selftest_count - 1) * 24;
 		s->smart_selftest_data[n] = s->sector;
 		s->smart_selftest_data[n+1] = 0x00; /* OK and finished */
--- a/hw/macio.c
+++ b/hw/macio.c
@@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d)
    sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]);
    sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]);
    sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]);
-    macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a);
+    macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a);
    ret = qdev_init(DEVICE(&ns->ide[1]));
    if (ret < 0) {
        return ret;
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -41,6 +41,7 @@
 #include "pci/msi.h"
 #include "qemu/bitops.h"
 #include "ppc.h"
+#include "qapi/qmp/qerror.h"

 //#define DEBUG_OPENPIC

@@ -1418,7 +1419,7 @@ static void openpic_load_IRQ_queue(QEMUFile* f, IRQQueue *q)
 static int openpic_load(QEMUFile* f, void *opaque, int version_id)
 {
    OpenPICState *opp = (OpenPICState *)opaque;
-    unsigned int i;
+    unsigned int i, nb_cpus;

    if (version_id != 1) {
        return -EINVAL;
@@ -1430,7 +1431,11 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
    qemu_get_be32s(f, &opp->spve);
    qemu_get_be32s(f, &opp->tfrr);

-    qemu_get_be32s(f, &opp->nb_cpus);
+    qemu_get_be32s(f, &nb_cpus);
+    if (opp->nb_cpus != nb_cpus) {
+        return -EINVAL;
+    }
+    assert(nb_cpus > 0 && nb_cpus <= MAX_CPU);

    for (i = 0; i < opp->nb_cpus; i++) {
        qemu_get_sbe32s(f, &opp->dst[i].ctpr);
@@ -1561,6 +1566,13 @@ static int openpic_init(SysBusDevice *dev)
        {NULL}
    };

+    if (opp->nb_cpus > MAX_CPU) {
+        error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
+                  TYPE_OPENPIC, "nb_cpus", (uint64_t)opp->nb_cpus,
+                  (uint64_t)0, (uint64_t)MAX_CPU);
+        return;
+    }
+
    memory_region_init(&opp->mem, "openpic", 0x40000);

    switch (opp->model) {
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1121,7 +1121,7 @@ void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
        if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
            pc_init_ne2k_isa(isa_bus, nd);
        } else {
-            pci_nic_init_nofail(nd, "e1000", NULL);
+            pci_nic_init_nofail(nd, "rtl8139", NULL);
        }
    }
 }
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -453,7 +453,32 @@ static QEMUMachine pc_machine_v1_0 = {
 };

 #define PC_COMPAT_0_15 \
-        PC_COMPAT_1_0
+        PC_COMPAT_1_0,\
+        {\
+            .driver   = "VGA",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "vmware-svga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "qxl-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "qxl",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "isa-cirrus-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        },{\
+            .driver   = "cirrus-vga",\
+            .property = "vgamem_mb",\
+            .value    = stringify(16),\
+        }

 static QEMUMachine pc_machine_v0_15 = {
    .name = "pc-0.15",
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -441,7 +441,7 @@ const VMStateDescription vmstate_pci_device = {
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields      = (VMStateField []) {
-        VMSTATE_INT32_LE(version_id, PCIDevice),
+        VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
        VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0,
                                   vmstate_info_pci_config,
                                   PCI_CONFIG_SPACE_SIZE),
@@ -458,7 +458,7 @@ const VMStateDescription vmstate_pcie_device = {
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields      = (VMStateField []) {
-        VMSTATE_INT32_LE(version_id, PCIDevice),
+        VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
        VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0,
                                   vmstate_info_pci_config,
                                   PCIE_CONFIG_SPACE_SIZE),
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -795,6 +795,13 @@ static const VMStateDescription vmstate_pcie_aer_err = {
    }
 };

+static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
+{
+    PCIEAERLog *s = opaque;
+
+    return s->log_num <= s->log_max;
+}
+
 const VMStateDescription vmstate_pcie_aer_log = {
    .name = "PCIE_AER_ERROR_LOG",
    .version_id = 1,
@@ -802,7 +809,8 @@ const VMStateDescription vmstate_pcie_aer_log = {
    .minimum_version_id_old = 1,
    .fields     = (VMStateField[]) {
        VMSTATE_UINT16(log_num, PCIEAERLog),
-        VMSTATE_UINT16(log_max, PCIEAERLog),
+        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
+        VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
                              vmstate_pcie_aer_err, PCIEAERErr),
        VMSTATE_END_OF_LIST()
--- a/hw/pcnet.c
+++ b/hw/pcnet.c
@@ -861,6 +861,8 @@ static void pcnet_init(PCNetState *s)

    s->csr[0] |= 0x0101;
    s->csr[0] &= ~0x0004;       /* clear STOP bit */
+
+    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 }

 static void pcnet_start(PCNetState *s)
@@ -878,6 +880,8 @@ static void pcnet_start(PCNetState *s)
    s->csr[0] &= ~0x0004;       /* clear STOP bit */
    s->csr[0] |= 0x0002;
    pcnet_poll_timer(s);
+
+    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 }

 static void pcnet_stop(PCNetState *s)
@@ -1209,7 +1213,7 @@ static void pcnet_transmit(PCNetState *s)
    hwaddr xmit_cxda = 0;
    int count = CSR_XMTRL(s)-1;
    int add_crc = 0;
-
+    int bcnt;
    s->xmit_pos = -1;

    if (!CSR_TXON(s)) {
@@ -1244,35 +1248,48 @@ static void pcnet_transmit(PCNetState *s)
            s->xmit_pos = -1;
            goto txdone;
        }
-        if (!GET_FIELD(tmd.status, TMDS, ENP)) {
-            int bcnt = 4096 - GET_FIELD(tmd.length, TMDL, BCNT);
-            s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr),
-                             s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s));
-            s->xmit_pos += bcnt;
-        } else if (s->xmit_pos >= 0) {
-            int bcnt = 4096 - GET_FIELD(tmd.length, TMDL, BCNT);
-            s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr),
-                             s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s));
-            s->xmit_pos += bcnt;
-#ifdef PCNET_DEBUG
-            printf("pcnet_transmit size=%d\n", s->xmit_pos);
-#endif
-            if (CSR_LOOP(s)) {
-                if (BCR_SWSTYLE(s) == 1)
-                    add_crc = !GET_FIELD(tmd.status, TMDS, NOFCS);
-                s->looptest = add_crc ? PCNET_LOOPTEST_CRC : PCNET_LOOPTEST_NOCRC;
-                pcnet_receive(qemu_get_queue(s->nic), s->buffer, s->xmit_pos);
-                s->looptest = 0;
-            } else
-                if (s->nic)
-                    qemu_send_packet(qemu_get_queue(s->nic), s->buffer,
-                                     s->xmit_pos);

-            s->csr[0] &= ~0x0008;   /* clear TDMD */
-            s->csr[4] |= 0x0004;    /* set TXSTRT */
-            s->xmit_pos = -1;
+        if (s->xmit_pos < 0) {
+            goto txdone;
        }

+        bcnt = 4096 - GET_FIELD(tmd.length, TMDL, BCNT);
+
+        /* if multi-tmd packet outsizes s->buffer then skip it silently.
+           Note: this is not what real hw does */
+        if (s->xmit_pos + bcnt > sizeof(s->buffer)) {
+           s->xmit_pos = -1;
+           goto txdone;
+        }
+
+        s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr),
+                         s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s));
+        s->xmit_pos += bcnt;
+
+        if (!GET_FIELD(tmd.status, TMDS, ENP)) {
+            goto txdone;
+        }
+
+#ifdef PCNET_DEBUG
+        printf("pcnet_transmit size=%d\n", s->xmit_pos);
+#endif
+        if (CSR_LOOP(s)) {
+            if (BCR_SWSTYLE(s) == 1)
+                add_crc = !GET_FIELD(tmd.status, TMDS, NOFCS);
+            s->looptest = add_crc ? PCNET_LOOPTEST_CRC : PCNET_LOOPTEST_NOCRC;
+            pcnet_receive(qemu_get_queue(s->nic), s->buffer, s->xmit_pos);
+            s->looptest = 0;
+        } else {
+            if (s->nic) {
+                qemu_send_packet(qemu_get_queue(s->nic), s->buffer,
+                                 s->xmit_pos);
+            }
+        }
+
+        s->csr[0] &= ~0x0008;   /* clear TDMD */
+        s->csr[4] |= 0x0004;    /* set TXSTRT */
+        s->xmit_pos = -1;
+
    txdone:
        SET_FIELD(&tmd.status, TMDS, OWN, 0);
        TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s)));
--- a/hw/pl022.c
+++ b/hw/pl022.c
@@ -236,11 +236,25 @@ static const MemoryRegionOps pl022_ops = {
    .endianness = DEVICE_NATIVE_ENDIAN,
 };

+static int pl022_post_load(void *opaque, int version_id)
+{
+    PL022State *s = opaque;
+
+    if (s->tx_fifo_head < 0 ||
+        s->tx_fifo_head >= ARRAY_SIZE(s->tx_fifo) ||
+        s->rx_fifo_head < 0 ||
+        s->rx_fifo_head >= ARRAY_SIZE(s->rx_fifo)) {
+        return -1;
+    }
+    return 0;
+}
+
 static const VMStateDescription vmstate_pl022 = {
    .name = "pl022_ssp",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
+    .post_load = pl022_post_load,
    .fields      = (VMStateField[]) {
        VMSTATE_UINT32(cr0, pl022_state),
        VMSTATE_UINT32(cr1, pl022_state),
--- a/hw/pxa2xx.c
+++ b/hw/pxa2xx.c
@@ -735,7 +735,7 @@ static void pxa2xx_ssp_save(QEMUFile *f, void *opaque)
 static int pxa2xx_ssp_load(QEMUFile *f, void *opaque, int version_id)
 {
    PXA2xxSSPState *s = (PXA2xxSSPState *) opaque;
-    int i;
+    int i, v;

    s->enable = qemu_get_be32(f);

@@ -749,7 +749,11 @@ static int pxa2xx_ssp_load(QEMUFile *f, void *opaque, int version_id)
    qemu_get_8s(f, &s->ssrsa);
    qemu_get_8s(f, &s->ssacd);

-    s->rx_level = qemu_get_byte(f);
+    v = qemu_get_byte(f);
+    if (v < 0 || v > ARRAY_SIZE(s->rx_fifo)) {
+        return -EINVAL;
+    }
+    s->rx_level = v;
    s->rx_start = 0;
    for (i = 0; i < s->rx_level; i ++)
        s->rx_fifo[i] = qemu_get_byte(f);
--- a/hw/qdev-core.h
+++ b/hw/qdev-core.h
@@ -96,7 +96,7 @@ typedef struct DeviceClass {
    /* Private to qdev / bus.  */
    qdev_initfn init; /* TODO remove, once users are converted to realize */
    qdev_event unplug;
-    qdev_event exit;
+    qdev_event exit; /* TODO remove, once users are converted to unrealize */
    const char *bus_type;
 } DeviceClass;

--- a/hw/qdev-monitor.c
+++ b/hw/qdev-monitor.c
@@ -40,9 +40,9 @@ static const QDevAlias qdev_alias_table[] = {
    { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
    { "virtio-balloon-pci", "virtio-balloon",
            QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
-    { "virtio-blk-s390", "virtio-blk", QEMU_ARCH_S390X },
-    { "virtio-net-s390", "virtio-net", QEMU_ARCH_S390X },
-    { "virtio-serial-s390", "virtio-serial", QEMU_ARCH_S390X },
+    { "virtio-blk-ccw", "virtio-blk", QEMU_ARCH_S390X },
+    { "virtio-net-ccw", "virtio-net", QEMU_ARCH_S390X },
+    { "virtio-serial-ccw", "virtio-serial", QEMU_ARCH_S390X },
    { "lsi53c895a", "lsi" },
    { "ich9-ahci", "ahci" },
    { "kvm-pci-assign", "pci-assign" },
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -143,6 +143,7 @@ PropertyInfo qdev_prop_uint8 = {

 static int parse_hex8(DeviceState *dev, Property *prop, const char *str)
 {
+    unsigned long val;
    uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
    char *end;

@@ -150,11 +151,18 @@ static int parse_hex8(DeviceState *dev, Property *prop, const char *str)
        return -EINVAL;
    }

-    *ptr = strtoul(str, &end, 16);
+    errno = 0;
+    val = strtoul(str, &end, 16);
+    if (errno) {
+        return -errno;
+    }
+    if (val > UINT8_MAX) {
+        return -ERANGE;
+    }
    if ((*end != '\0') || (end == str)) {
        return -EINVAL;
    }
-
+    *ptr = val;
    return 0;
 }

@@ -274,6 +282,7 @@ PropertyInfo qdev_prop_int32 = {

 static int parse_hex32(DeviceState *dev, Property *prop, const char *str)
 {
+    unsigned long val;
    uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
    char *end;

@@ -281,11 +290,18 @@ static int parse_hex32(DeviceState *dev, Property *prop, const char *str)
        return -EINVAL;
    }

-    *ptr = strtoul(str, &end, 16);
+    errno = 0;
+    val = strtoul(str, &end, 16);
+    if (errno) {
+        return -errno;
+    }
+    if (val > UINT32_MAX) {
+        return -ERANGE;
+    }
    if ((*end != '\0') || (end == str)) {
        return -EINVAL;
    }
-
+    *ptr = val;
    return 0;
 }

@@ -341,6 +357,7 @@ PropertyInfo qdev_prop_uint64 = {

 static int parse_hex64(DeviceState *dev, Property *prop, const char *str)
 {
+    unsigned long long val;
    uint64_t *ptr = qdev_get_prop_ptr(dev, prop);
    char *end;

@@ -348,11 +365,18 @@ static int parse_hex64(DeviceState *dev, Property *prop, const char *str)
        return -EINVAL;
    }

-    *ptr = strtoull(str, &end, 16);
+    errno = 0;
+    val = strtoull(str, &end, 16);
+    if (errno) {
+        return -errno;
+    }
+    if (val > UINT64_MAX) {
+        return -ERANGE;
+    }
    if ((*end != '\0') || (end == str)) {
        return -EINVAL;
    }
-
+    *ptr = val;
    return 0;
 }

--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -180,6 +180,19 @@ static void device_realize(DeviceState *dev, Error **err)
    }
 }

+static void device_unrealize(DeviceState *dev, Error **errp)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+    if (dc->exit) {
+        int rc = dc->exit(dev);
+        if (rc < 0) {
+            error_setg(errp, "Device exit failed.");
+            return;
+        }
+    }
+}
+
 void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
                                 int required_for_version)
 {
@@ -692,6 +705,9 @@ static void device_set_realized(Object *obj, bool value, Error **err)
            device_reset(dev);
        }
    } else if (!value && dev->realized) {
+        if (qdev_get_vmsd(dev)) {
+            vmstate_unregister(dev, qdev_get_vmsd(dev), dev);
+        }
        if (dc->unrealize) {
            dc->unrealize(dev, &local_err);
        }
@@ -758,7 +774,6 @@ static void device_class_base_init(ObjectClass *class, void *data)
 static void device_unparent(Object *obj)
 {
    DeviceState *dev = DEVICE(obj);
-    DeviceClass *dc = DEVICE_GET_CLASS(dev);
    BusState *bus;

    while (dev->num_child_bus) {
@@ -766,12 +781,7 @@ static void device_unparent(Object *obj)
        qbus_free(bus);
    }
    if (dev->realized) {
-        if (qdev_get_vmsd(dev)) {
-            vmstate_unregister(dev, qdev_get_vmsd(dev), dev);
-        }
-        if (dc->exit) {
-            dc->exit(dev);
-        }
+        object_property_set_bool(obj, false, "realized", NULL);
    }
    if (dev->parent_bus) {
        bus_remove_child(dev->parent_bus, dev);
@@ -786,6 +796,7 @@ static void device_class_init(ObjectClass *class, void *data)

    class->unparent = device_unparent;
    dc->realize = device_realize;
+    dc->unrealize = device_unrealize;
 }

 void device_reset(DeviceState *dev)
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -118,7 +118,8 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
                 qxl->guest_primary.surface.height,
                 qxl->guest_primary.bits_pp,
                 qxl->guest_primary.abs_stride,
-                 qxl->guest_primary.data);
+                 qxl->guest_primary.data,
+                 false);
        } else {
            qemu_resize_displaysurface(vga->ds,
                    qxl->guest_primary.surface.width,
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1075,8 +1075,8 @@ static void qxl_enter_vga_mode(PCIQXLDevice *d)
    trace_qxl_enter_vga_mode(d->id);
    qemu_spice_create_host_primary(&d->ssd);
    d->mode = QXL_MODE_VGA;
-    dpy_gfx_resize(d->ssd.ds);
    vga_dirty_log_start(&d->vga);
+    vga_hw_update();
 }

 static void qxl_exit_vga_mode(PCIQXLDevice *d)
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -2575,6 +2575,9 @@ static void rtl8139_RxBufPtr_write(RTL8139State *s, uint32_t val)
    /* this value is off by 16 */
    s->RxBufPtr = MOD2(val + 0x10, s->RxBufferSize);

+    /* more buffer space may be available so try to receive */
+    qemu_flush_queued_packets(qemu_get_queue(s->nic));
+
    DPRINTF(" CAPR write: rx buffer length %d head 0x%04x read 0x%04x\n",
        s->RxBufferSize, s->RxBufAddr, s->RxBufPtr);
 }
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -777,7 +777,7 @@ int css_do_tsch(SubchDev *sch, IRB *target_irb)
            (p->chars & PMCW_CHARS_MASK_CSENSE)) {
            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
            memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data));
-            irb.esw[1] = 0x02000000 | (sizeof(sch->sense_data) << 8);
+            irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8);
        }
    }
    /* Store the irb to the guest. */
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -16,6 +16,8 @@
 #include "elf.h"
 #include "hw/loader.h"
 #include "hw/sysbus.h"
+#include "hw/s390x/virtio-ccw.h"
+#include "hw/s390x/css.h"

 #define KERN_IMAGE_START                0x010000UL
 #define KERN_PARM_AREA                  0x010480UL
@@ -23,7 +25,6 @@
 #define INITRD_PARM_START               0x010408UL
 #define INITRD_PARM_SIZE                0x010410UL
 #define PARMFILE_START                  0x001000UL
-#define ZIPL_FILENAME                   "s390-zipl.rom"
 #define ZIPL_IMAGE_START                0x009000UL
 #define IPL_PSW_MASK                    (PSW_MASK_32 | PSW_MASK_64)

@@ -48,24 +49,16 @@ typedef struct S390IPLClass {
 typedef struct S390IPLState {
    /*< private >*/
    SysBusDevice parent_obj;
-    /*< public >*/
+    uint64_t start_addr;

+    /*< public >*/
    char *kernel;
    char *initrd;
    char *cmdline;
+    char *firmware;
 } S390IPLState;


-static void s390_ipl_cpu(uint64_t pswaddr)
-{
-    S390CPU *cpu = S390_CPU(qemu_get_cpu(0));
-    CPUS390XState *env = &cpu->env;
-
-    env->psw.addr = pswaddr;
-    env->psw.mask = IPL_PSW_MASK;
-    s390_add_running_cpu(cpu);
-}
-
 static int s390_ipl_init(SysBusDevice *dev)
 {
    S390IPLState *ipl = S390_IPL(dev);
@@ -77,20 +70,29 @@ static int s390_ipl_init(SysBusDevice *dev)

        /* Load zipl bootloader */
        if (bios_name == NULL) {
-            bios_name = ZIPL_FILENAME;
+            bios_name = ipl->firmware;
        }

        bios_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
-        bios_size = load_image_targphys(bios_filename, ZIPL_IMAGE_START, 4096);
+        if (bios_filename == NULL) {
+            hw_error("could not find stage1 bootloader\n");
+        }
+
+        bios_size = load_elf(bios_filename, NULL, NULL, &ipl->start_addr, NULL,
+                             NULL, 1, ELF_MACHINE, 0);
+        if (bios_size == -1UL) {
+            bios_size = load_image_targphys(bios_filename, ZIPL_IMAGE_START,
+                                            4096);
+            ipl->start_addr = ZIPL_IMAGE_START;
+            if (bios_size > 4096) {
+                hw_error("stage1 bootloader is > 4k\n");
+            }
+        }
        g_free(bios_filename);

        if ((long)bios_size < 0) {
            hw_error("could not load bootloader '%s'\n", bios_name);
        }
-
-        if (bios_size > 4096) {
-            hw_error("stage1 bootloader is > 4k\n");
-        }
        return 0;
    } else {
        kernel_size = load_elf(ipl->kernel, NULL, NULL, NULL, NULL,
@@ -104,6 +106,13 @@ static int s390_ipl_init(SysBusDevice *dev)
        }
        /* we have to overwrite values in the kernel image, which are "rom" */
        strcpy(rom_ptr(KERN_PARM_AREA), ipl->cmdline);
+
+        /*
+         * we can not rely on the ELF entry point, since up to 3.2 this
+         * value was 0x800 (the SALIPL loader) and it wont work. For
+         * all (Linux) cases 0x10000 (KERN_IMAGE_START) should be fine.
+         */
+        ipl->start_addr = KERN_IMAGE_START;
    }
    if (ipl->initrd) {
        ram_addr_t initrd_offset, initrd_size;
@@ -131,23 +140,37 @@ static Property s390_ipl_properties[] = {
    DEFINE_PROP_STRING("kernel", S390IPLState, kernel),
    DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
    DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
+    DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
    DEFINE_PROP_END_OF_LIST(),
 };

 static void s390_ipl_reset(DeviceState *dev)
 {
    S390IPLState *ipl = S390_IPL(dev);
+    S390CPU *cpu = S390_CPU(qemu_get_cpu(0));
+    CPUS390XState *env = &cpu->env;

-    if (ipl->kernel) {
-        /*
-         * we can not rely on the ELF entry point, since up to 3.2 this
-         * value was 0x800 (the SALIPL loader) and it wont work. For
-         * all (Linux) cases 0x10000 (KERN_IMAGE_START) should be fine.
-         */
-        return s390_ipl_cpu(KERN_IMAGE_START);
-    } else {
-        return s390_ipl_cpu(ZIPL_IMAGE_START);
+    env->psw.addr = ipl->start_addr;
+    env->psw.mask = IPL_PSW_MASK;
+
+    if (!ipl->kernel) {
+        /* Tell firmware, if there is a preferred boot device */
+        env->regs[7] = -1;
+        DeviceState *dev_st = get_boot_device(0);
+        if (dev_st) {
+            VirtioCcwDevice *ccw_dev = (VirtioCcwDevice *) object_dynamic_cast(
+                OBJECT(dev_st),
+                "virtio-blk-ccw");
+
+            if (ccw_dev) {
+                env->regs[7] = ccw_dev->sch->cssid << 24 |
+                               ccw_dev->sch->ssid << 16 |
+                               ccw_dev->sch->devno;
+            }
+        }
    }
+
+    s390_add_running_cpu(cpu);
 }

 static void s390_ipl_class_init(ObjectClass *klass, void *data)
--- a/hw/s390x/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -402,6 +402,7 @@ static const VirtIOBindings virtio_s390_bindings = {

 static Property s390_virtio_net_properties[] = {
    DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
+    DEFINE_VIRTIO_NET_FEATURES(VirtIOS390Device, host_features),
    DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device,
                       net.txtimer, TX_TIMER_INTERVAL),
    DEFINE_PROP_INT32("x-txburst", VirtIOS390Device,
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -31,6 +31,9 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
    if (!sch || !css_subch_visible(sch)) {
        return -EINVAL;
    }
+    if (queue >= VIRTIO_PCI_QUEUE_MAX) {
+        return -EINVAL;
+    }
    virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
    return 0;

@@ -80,7 +83,7 @@ static void ccw_init(QEMUMachineInitArgs *args)
    css_bus = virtual_css_bus_init();
    s390_sclp_init();
    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
-                      args->initrd_filename);
+                      args->initrd_filename, "s390-ccw.img");

    /* register hypercalls */
    virtio_ccw_register_hcalls();
@@ -123,6 +126,7 @@ static QEMUMachine ccw_machine = {
    .no_sdcard = 1,
    .use_sclp = 1,
    .max_cpus = 255,
+    .is_default = 1,
    DEFAULT_MACHINE_OPTIONS,
 };

--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -49,8 +49,11 @@
 #endif

 #define MAX_BLK_DEVS                    10
+#define ZIPL_FILENAME                   "s390-zipl.rom"

+#if 0
 static VirtIOS390Bus *s390_bus;
+#endif
 static S390CPU **ipi_states;

 S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
@@ -62,6 +65,7 @@ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
    return ipi_states[cpu_addr];
 }

+#if 0
 static int s390_virtio_hcall_notify(const uint64_t *args)
 {
    uint64_t mem = args[0];
@@ -76,6 +80,11 @@ static int s390_virtio_hcall_notify(const uint64_t *args)
        }
    } else {
        /* Early printk */
+	uint8_t *p = (uint8_t *)qemu_get_ram_ptr(mem);
+	if (s390_bus) {
+		VirtIOS390Device *dev = s390_virtio_bus_console(s390_bus);
+		virtio_console_print_early(dev->vdev, p);
+	}
    }
    return r;
 }
@@ -121,6 +130,7 @@ static void s390_virtio_register_hcalls(void)
    s390_register_virtio_hypercall(KVM_S390_VIRTIO_SET_STATUS,
                                   s390_virtio_hcall_set_status);
 }
+#endif

 /*
 * The number of running CPUs. On s390 a shutdown is the state of all CPUs
@@ -156,7 +166,8 @@ unsigned s390_del_running_cpu(S390CPU *cpu)

 void s390_init_ipl_dev(const char *kernel_filename,
                       const char *kernel_cmdline,
-                       const char *initrd_filename)
+                       const char *initrd_filename,
+                       const char *firmware)
 {
    DeviceState *dev;

@@ -168,6 +179,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
        qdev_prop_set_string(dev, "initrd", initrd_filename);
    }
    qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
+    qdev_prop_set_string(dev, "firmware", firmware);
    qdev_init_nofail(dev);
 }

@@ -217,6 +229,7 @@ void s390_create_virtio_net(BusState *bus, const char *name)
    }
 }

+#if 0
 /* PC hardware initialisation */
 static void s390_init(QEMUMachineInitArgs *args)
 {
@@ -243,7 +256,7 @@ static void s390_init(QEMUMachineInitArgs *args)
    s390_bus = s390_virtio_bus_init(&my_ram_size);
    s390_sclp_init();
    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
-                      args->initrd_filename);
+                      args->initrd_filename, ZIPL_FILENAME);

    /* register hypercalls */
    s390_virtio_register_hcalls();
@@ -285,7 +298,6 @@ static QEMUMachine s390_machine = {
    .no_sdcard = 1,
    .use_virtcon = 1,
    .max_cpus = 255,
-    .is_default = 1,
    DEFAULT_MACHINE_OPTIONS,
 };

@@ -295,3 +307,4 @@ static void s390_machine_init(void)
 }

 machine_init(s390_machine_init);
+#endif
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -23,6 +23,7 @@ void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
 void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
 void s390_init_ipl_dev(const char *kernel_filename,
                       const char *kernel_cmdline,
-                       const char *initrd_filename);
+                       const char *initrd_filename,
+                       const char *firmware);
 void s390_create_virtio_net(BusState *bus, const char *name);
 #endif
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -332,10 +332,10 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
            ret = -EINVAL;
            break;
        }
-        indicators = ldq_phys(ccw.cda);
-        if (!indicators) {
+        if (!ccw.cda) {
            ret = -EFAULT;
        } else {
+            indicators = ldq_phys(ccw.cda);
            dev->indicators = indicators;
            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
            ret = 0;
@@ -352,10 +352,10 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
            ret = -EINVAL;
            break;
        }
-        indicators = ldq_phys(ccw.cda);
-        if (!indicators) {
+        if (!ccw.cda) {
            ret = -EFAULT;
        } else {
+            indicators = ldq_phys(ccw.cda);
            dev->indicators2 = indicators;
            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
            ret = 0;
@@ -643,6 +643,30 @@ static int virtio_ccw_scsi_exit(VirtioCcwDevice *dev)
    return virtio_ccw_exit(dev);
 }

+static int virtio_ccw_rng_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    if (dev->rng.rng == NULL) {
+        dev->rng.default_backend = RNG_RANDOM(object_new(TYPE_RNG_RANDOM));
+        object_property_add_child(OBJECT(dev), "default-backend",
+                                  OBJECT(dev->rng.default_backend), NULL);
+        object_property_set_link(OBJECT(dev), OBJECT(dev->rng.default_backend),
+                                 "rng", NULL);
+    }
+    vdev = virtio_rng_init((DeviceState *)dev, &dev->rng);
+    if (!vdev) {
+        return -1;
+    }
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_rng_exit(VirtioCcwDevice *dev)
+{
+    virtio_rng_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
 /* DeviceState to VirtioCcwDevice. Note: used on datapath,
 * be careful and test performance if you change this.
 */
@@ -662,10 +686,16 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
    }

    if (vector < VIRTIO_PCI_QUEUE_MAX) {
+        if (!dev->indicators) {
+            return;
+        }
        indicators = ldq_phys(dev->indicators);
        indicators |= 1ULL << vector;
        stq_phys(dev->indicators, indicators);
    } else {
+        if (!dev->indicators2) {
+            return;
+        }
        vector = 0;
        indicators = ldq_phys(dev->indicators2);
        indicators |= 1ULL << vector;
@@ -690,6 +720,8 @@ static void virtio_ccw_reset(DeviceState *d)

    virtio_reset(dev->vdev);
    css_reset_sch(dev->sch);
+    dev->indicators = 0;
+    dev->indicators2 = 0;
 }

 /**************** Virtio-ccw Bus Device Descriptions *******************/
@@ -832,6 +864,41 @@ static const TypeInfo virtio_ccw_scsi = {
    .class_init    = virtio_ccw_scsi_class_init,
 };

+static void virtio_ccw_rng_initfn(Object *obj)
+{
+    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(obj);
+
+    object_property_add_link(obj, "rng", TYPE_RNG_BACKEND,
+                             (Object **)&dev->rng.rng, NULL);
+}
+
+static Property virtio_ccw_rng_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_UINT64("max-bytes", VirtioCcwDevice, rng.max_bytes, INT64_MAX),
+    DEFINE_PROP_UINT32("period", VirtioCcwDevice, rng.period_ms, 1 << 16),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_rng_init;
+    k->exit = virtio_ccw_rng_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_rng_properties;
+}
+
+static const TypeInfo virtio_ccw_rng = {
+    .name          = "virtio-rng-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .instance_init = virtio_ccw_rng_initfn,
+    .class_init    = virtio_ccw_rng_class_init,
+};
+
 static int virtio_ccw_busdev_init(DeviceState *dev)
 {
    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
@@ -955,6 +1022,7 @@ static void virtio_ccw_register(void)
    type_register_static(&virtio_ccw_net);
    type_register_static(&virtio_ccw_balloon);
    type_register_static(&virtio_ccw_scsi);
+    type_register_static(&virtio_ccw_rng);
    type_register_static(&virtual_css_bridge_info);
 }

--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -16,6 +16,7 @@
 #include <hw/virtio-net.h>
 #include <hw/virtio-serial.h>
 #include <hw/virtio-scsi.h>
+#include <hw/virtio-rng.h>
 #include <hw/virtio-bus.h>

 #define VIRTUAL_CSSID 0xfe
@@ -77,6 +78,7 @@ struct VirtioCcwDevice {
    virtio_serial_conf serial;
    virtio_net_conf net;
    VirtIOSCSIConf scsi;
+    VirtIORNGConf rng;
    VirtioBusState bus;
    /* Guest provided values: */
    hwaddr indicators;
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -11,6 +11,8 @@ static char *scsibus_get_dev_path(DeviceState *dev);
 static char *scsibus_get_fw_dev_path(DeviceState *dev);
 static int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf);
 static void scsi_req_dequeue(SCSIRequest *req);
+static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len);
+static void scsi_target_free_buf(SCSIRequest *req);

 static Property scsi_props[] = {
    DEFINE_PROP_UINT32("channel", SCSIDevice, channel, 0),
@@ -304,7 +306,8 @@ typedef struct SCSITargetReq SCSITargetReq;
 struct SCSITargetReq {
    SCSIRequest req;
    int len;
-    uint8_t buf[2056];
+    uint8_t *buf;
+    int buf_len;
 };

 static void store_lun(uint8_t *outbuf, int lun)
@@ -348,14 +351,12 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
    if (!found_lun0) {
        n += 8;
    }
-    len = MIN(n + 8, r->req.cmd.xfer & ~7);
-    if (len > sizeof(r->buf)) {
-        /* TODO: > 256 LUNs? */
-        return false;
-    }

+    scsi_target_alloc_buf(&r->req, n + 8);
+
+    len = MIN(n + 8, r->req.cmd.xfer & ~7);
    memset(r->buf, 0, len);
-    stl_be_p(&r->buf, n);
+    stl_be_p(&r->buf[0], n);
    i = found_lun0 ? 8 : 16;
    QTAILQ_FOREACH(kid, &r->req.bus->qbus.children, sibling) {
        DeviceState *qdev = kid->child;
@@ -374,6 +375,9 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
 static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
 {
    assert(r->req.dev->lun != r->req.lun);
+
+    scsi_target_alloc_buf(&r->req, SCSI_INQUIRY_LEN);
+
    if (r->req.cmd.buf[1] & 0x2) {
        /* Command support data - optional, not implemented */
        return false;
@@ -398,7 +402,7 @@ static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
            return false;
        }
        /* done with EVPD */
-        assert(r->len < sizeof(r->buf));
+        assert(r->len < r->buf_len);
        r->len = MIN(r->req.cmd.xfer, r->len);
        return true;
    }
@@ -409,7 +413,7 @@ static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
    }

    /* PAGE CODE == 0 */
-    r->len = MIN(r->req.cmd.xfer, 36);
+    r->len = MIN(r->req.cmd.xfer, SCSI_INQUIRY_LEN);
    memset(r->buf, 0, r->len);
    if (r->req.lun != 0) {
        r->buf[0] = TYPE_NO_LUN;
@@ -442,8 +446,9 @@ static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf)
        }
        break;
    case REQUEST_SENSE:
+        scsi_target_alloc_buf(&r->req, SCSI_SENSE_LEN);
        r->len = scsi_device_get_sense(r->req.dev, r->buf,
-                                       MIN(req->cmd.xfer, sizeof r->buf),
+                                       MIN(req->cmd.xfer, r->buf_len),
                                       (req->cmd.buf[1] & 1) == 0);
        if (r->req.dev->sense_is_ua) {
            scsi_device_unit_attention_reported(req->dev);
@@ -488,11 +493,29 @@ static uint8_t *scsi_target_get_buf(SCSIRequest *req)
    return r->buf;
 }

+static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len)
+{
+    SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req);
+
+    r->buf = g_malloc(len);
+    r->buf_len = len;
+
+    return r->buf;
+}
+
+static void scsi_target_free_buf(SCSIRequest *req)
+{
+    SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req);
+
+    g_free(r->buf);
+}
+
 static const struct SCSIReqOps reqops_target_command = {
    .size         = sizeof(SCSITargetReq),
    .send_command = scsi_target_send_command,
    .read_data    = scsi_target_read_data,
    .get_buf      = scsi_target_get_buf,
+    .free_req     = scsi_target_free_buf,
 };


@@ -1348,7 +1371,7 @@ int scsi_build_sense(uint8_t *in_buf, int in_len,
        buf[7] = 10;
        buf[12] = sense.asc;
        buf[13] = sense.ascq;
-        return MIN(len, 18);
+        return MIN(len, SCSI_SENSE_LEN);
    } else {
        /* Return descriptor format sense buffer */
        buf[0] = 0x72;
@@ -1508,6 +1531,10 @@ void scsi_req_unref(SCSIRequest *req)
   will start the next chunk or complete the command.  */
 void scsi_req_continue(SCSIRequest *req)
 {
+    if (req->io_canceled) {
+        trace_scsi_req_continue_canceled(req->dev->id, req->lun, req->tag);
+        return;
+    }
    trace_scsi_req_continue(req->dev->id, req->lun, req->tag);
    if (req->cmd.mode == SCSI_XFER_TO_DEV) {
        req->ops->write_data(req);
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -176,6 +176,9 @@ static void scsi_aio_complete(void *opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    if (r->req.io_canceled) {
+        goto done;
+    }

    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
@@ -221,6 +224,10 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
 {
    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);

+    if (r->req.io_canceled) {
+        goto done;
+    }
+
    if (scsi_is_cmd_fua(&r->req.cmd)) {
        bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH);
        r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r);
@@ -228,6 +235,8 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
    }

    scsi_req_complete(&r->req, GOOD);
+
+done:
    if (!r->req.io_canceled) {
        scsi_req_unref(&r->req);
    }
@@ -241,6 +250,9 @@ static void scsi_dma_complete(void *opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    if (r->req.io_canceled) {
+        goto done;
+    }

    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
@@ -272,6 +284,9 @@ static void scsi_read_complete(void * opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;
    bdrv_acct_done(s->qdev.conf.bs, &r->acct);
+    if (r->req.io_canceled) {
+        goto done;
+    }

    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
@@ -303,6 +318,9 @@ static void scsi_do_read(void *opaque, int ret)
        r->req.aiocb = NULL;
        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
    }
+    if (r->req.io_canceled) {
+        goto done;
+    }

    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
@@ -310,10 +328,6 @@ static void scsi_do_read(void *opaque, int ret)
        }
    }

-    if (r->req.io_canceled) {
-        return;
-    }
-
    /* The request is used as the AIO opaque value, so add a ref.  */
    scsi_req_ref(&r->req);

@@ -421,6 +435,9 @@ static void scsi_write_complete(void * opaque, int ret)
        r->req.aiocb = NULL;
        bdrv_acct_done(s->qdev.conf.bs, &r->acct);
    }
+    if (r->req.io_canceled) {
+        goto done;
+    }

    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
@@ -1476,13 +1493,17 @@ static void scsi_unmap_complete(void *opaque, int ret)
    uint32_t nb_sectors;

    r->req.aiocb = NULL;
+    if (r->req.io_canceled) {
+        goto done;
+    }
+
    if (ret < 0) {
        if (scsi_handle_rw_error(r, -ret)) {
            goto done;
        }
    }

-    if (data->count > 0 && !r->req.io_canceled) {
+    if (data->count > 0) {
        sector_num = ldq_be_p(&data->inbuf[0]);
        nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
        if (!check_lba_range(s, sector_num, nb_sectors)) {
@@ -1499,10 +1520,9 @@ static void scsi_unmap_complete(void *opaque, int ret)
        return;
    }

+    scsi_req_complete(&r->req, GOOD);
+
 done:
-    if (data->count == 0) {
-        scsi_req_complete(&r->req, GOOD);
-    }
    if (!r->req.io_canceled) {
        scsi_req_unref(&r->req);
    }
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -9,6 +9,8 @@
 #define MAX_SCSI_DEVS	255

 #define SCSI_CMD_BUF_SIZE     16
+#define SCSI_SENSE_LEN      18
+#define SCSI_INQUIRY_LEN    36

 typedef struct SCSIBus SCSIBus;
 typedef struct SCSIBusInfo SCSIBusInfo;
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -260,6 +260,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
    _FDT((fdt_begin_node(fdt, "")));
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
+    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));

    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
--- a/hw/spapr_llan.c
+++ b/hw/spapr_llan.c
@@ -175,11 +175,19 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
    return size;
 }

+static void spapr_vlan_cleanup(NetClientState *nc)
+{
+    VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc);
+
+    dev->nic = NULL;
+}
+
 static NetClientInfo net_spapr_vlan_info = {
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
    .size = sizeof(NICState),
    .can_receive = spapr_vlan_can_receive,
    .receive = spapr_vlan_receive,
+    .cleanup = spapr_vlan_cleanup,
 };

 static void spapr_vlan_reset(VIOsPAPRDevice *sdev)
--- a/hw/ssd0323.c
+++ b/hw/ssd0323.c
@@ -311,18 +311,42 @@ static int ssd0323_load(QEMUFile *f, void *opaque, int version_id)
        return -EINVAL;

    s->cmd_len = qemu_get_be32(f);
+    if (s->cmd_len < 0 || s->cmd_len > ARRAY_SIZE(s->cmd_data)) {
+        return -EINVAL;
+    }
    s->cmd = qemu_get_be32(f);
    for (i = 0; i < 8; i++)
        s->cmd_data[i] = qemu_get_be32(f);
    s->row = qemu_get_be32(f);
+    if (s->row < 0 || s->row >= 80) {
+        return -EINVAL;
+    }
    s->row_start = qemu_get_be32(f);
+    if (s->row_start < 0 || s->row_start >= 80) {
+        return -EINVAL;
+    }
    s->row_end = qemu_get_be32(f);
+    if (s->row_end < 0 || s->row_end >= 80) {
+        return -EINVAL;
+    }
    s->col = qemu_get_be32(f);
+    if (s->col < 0 || s->col >= 64) {
+        return -EINVAL;
+    }
    s->col_start = qemu_get_be32(f);
+    if (s->col_start < 0 || s->col_start >= 64) {
+        return -EINVAL;
+    }
    s->col_end = qemu_get_be32(f);
+    if (s->col_end < 0 || s->col_end >= 64) {
+        return -EINVAL;
+    }
    s->redraw = qemu_get_be32(f);
    s->remap = qemu_get_be32(f);
    s->mode = qemu_get_be32(f);
+    if (s->mode != SSD0323_CMD && s->mode != SSD0323_DATA) {
+        return -EINVAL;
+    }
    qemu_get_buffer(f, s->framebuffer, sizeof(s->framebuffer));

    ss->cs = qemu_get_be32(f);
--- a/hw/ssi-sd.c
+++ b/hw/ssi-sd.c
@@ -230,8 +230,17 @@ static int ssi_sd_load(QEMUFile *f, void *opaque, int version_id)
    for (i = 0; i < 5; i++)
        s->response[i] = qemu_get_be32(f);
    s->arglen = qemu_get_be32(f);
+    if (s->mode == SSI_SD_CMDARG &&
+        (s->arglen < 0 || s->arglen >= ARRAY_SIZE(s->cmdarg))) {
+        return -EINVAL;
+    }
    s->response_pos = qemu_get_be32(f);
    s->stopping = qemu_get_be32(f);
+    if (s->mode == SSI_SD_RESPONSE &&
+        (s->response_pos < 0 || s->response_pos >= ARRAY_SIZE(s->response) ||
+        (!s->stopping && s->arglen > ARRAY_SIZE(s->response)))) {
+        return -EINVAL;
+    }

    ss->cs = qemu_get_be32(f);

--- a/hw/tsc210x.c
+++ b/hw/tsc210x.c
@@ -1070,9 +1070,21 @@ static int tsc210x_load(QEMUFile *f, void *opaque, int version_id)
    s->enabled = qemu_get_byte(f);
    s->host_mode = qemu_get_byte(f);
    s->function = qemu_get_byte(f);
+    if (s->function < 0 || s->function >= ARRAY_SIZE(mode_regs)) {
+        return -EINVAL;
+    }
    s->nextfunction = qemu_get_byte(f);
+    if (s->nextfunction < 0 || s->nextfunction >= ARRAY_SIZE(mode_regs)) {
+        return -EINVAL;
+    }
    s->precision = qemu_get_byte(f);
+    if (s->precision < 0 || s->precision >= ARRAY_SIZE(resolution)) {
+        return -EINVAL;
+    }
    s->nextprecision = qemu_get_byte(f);
+    if (s->nextprecision < 0 || s->nextprecision >= ARRAY_SIZE(resolution)) {
+        return -EINVAL;
+    }
    s->filter = qemu_get_byte(f);
    s->pin_func = qemu_get_byte(f);
    s->ref = qemu_get_byte(f);
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -46,6 +46,12 @@ static int usb_device_post_load(void *opaque, int version_id)
    } else {
        dev->attached = 1;
    }
+    if (dev->setup_index < 0 ||
+        dev->setup_len < 0 ||
+        dev->setup_index > dev->setup_len ||
+        dev->setup_len > sizeof(dev->data_buf)) {
+        return -EINVAL;
+    }
    return 0;
 }

--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -236,7 +236,7 @@ static const USBDescDevice desc_device_tablet2 = {
            .bNumInterfaces        = 1,
            .bConfigurationValue   = 1,
            .iConfiguration        = STR_CONFIG_TABLET,
-            .bmAttributes          = 0xa0,
+            .bmAttributes          = 0x80,
            .bMaxPower             = 50,
            .nif = 1,
            .ifs = &desc_iface_tablet2,
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1985,6 +1985,10 @@ static int usbredir_post_load(void *priv, int version_id)
 {
    USBRedirDevice *dev = priv;

+    if (dev->parser == NULL) {
+        return 0;
+    }
+
    switch (dev->device_info.speed) {
    case usb_redir_speed_low:
        dev->dev.speed = USB_SPEED_LOW;
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -1643,6 +1643,11 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    uint8_t *d;
    uint32_t v, addr1, addr;
    vga_draw_line_func *vga_draw_line;
+#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
+    static const bool byteswap = false;
+#else
+    static const bool byteswap = true;
+#endif

    full_update |= update_basic_params(s);

@@ -1685,18 +1690,11 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        disp_width != s->last_width ||
        height != s->last_height ||
        s->last_depth != depth) {
-#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
-        if (depth == 16 || depth == 32) {
-#else
-        if (depth == 32) {
-#endif
+        if (depth == 32 || (depth == 16 && !byteswap)) {
            qemu_free_displaysurface(s->ds);
            s->ds->surface = qemu_create_displaysurface_from(disp_width, height, depth,
                    s->line_offset,
-                    s->vram_ptr + (s->start_addr * 4));
-#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
-            s->ds->surface->pf = qemu_different_endianness_pixelformat(depth);
-#endif
+                    s->vram_ptr + (s->start_addr * 4), byteswap);
            dpy_gfx_resize(s->ds);
        } else {
            qemu_console_resize(s->ds, disp_width, height);
@@ -1715,7 +1713,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        s->ds->surface = qemu_create_displaysurface_from(disp_width,
                height, depth,
                s->line_offset,
-                s->vram_ptr + (s->start_addr * 4));
+                s->vram_ptr + (s->start_addr * 4), byteswap);
        dpy_gfx_setdata(s->ds);
    }

--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -291,7 +291,7 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
    dev->actual = le32_to_cpu(config.actual);
    if (dev->actual != oldactual) {
        qemu_balloon_changed(ram_size -
-                             (dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
+                       ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
    }
 }

--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -35,7 +35,9 @@ typedef struct VirtIOBlock
    BlockConf *conf;
    VirtIOBlkConf *blk;
    unsigned short sector_mask;
+    bool original_wce;
    DeviceState *qdev;
+    VMChangeStateEntry *change;
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
    VirtIOBlockDataPlane *dataplane;
 #endif
@@ -478,9 +480,9 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,

 static void virtio_blk_reset(VirtIODevice *vdev)
 {
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
    VirtIOBlock *s = to_virtio_blk(vdev);

+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
    if (s->dataplane) {
        virtio_blk_data_plane_stop(s->dataplane);
    }
@@ -491,6 +493,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
     * are per-device request lists.
     */
    bdrv_drain_all();
+    bdrv_set_enable_write_cache(s->bs, s->original_wce);
 }

 /* coalesce internal state, copy to pci i/o region 0
@@ -582,7 +585,25 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
    }

    features = vdev->guest_features;
-    bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+
+    /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
+     * cache flushes.  Thus, the "auto writethrough" behavior is never
+     * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature.
+     * Leaving it enabled would break the following sequence:
+     *
+     *     Guest started with "-drive cache=writethrough"
+     *     Guest sets status to 0
+     *     Guest sets DRIVER bit in status field
+     *     Guest reads host features (WCE=0, CONFIG_WCE=1)
+     *     Guest writes guest features (WCE=0, CONFIG_WCE=1)
+     *     Guest writes 1 to the WCE configuration field (writeback mode)
+     *     Guest sets DRIVER_OK bit in status field
+     *
+     * s->bs would erroneously be placed in writethrough mode.
+     */
+    if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
+        bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+    }
 }

 static void virtio_blk_save(QEMUFile *f, void *opaque)
@@ -662,6 +683,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
                                          sizeof(struct virtio_blk_config),
                                          sizeof(VirtIOBlock));

+    s->original_wce = bdrv_enable_write_cache(blk->conf.bs);
    s->vdev.get_config = virtio_blk_update_config;
    s->vdev.set_config = virtio_blk_set_config;
    s->vdev.get_features = virtio_blk_get_features;
@@ -681,7 +703,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
    }
 #endif

-    qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+    s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
    s->qdev = dev;
    register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
                    virtio_blk_save, virtio_blk_load, s);
@@ -702,6 +724,7 @@ void virtio_blk_exit(VirtIODevice *vdev)
    virtio_blk_data_plane_destroy(s->dataplane);
    s->dataplane = NULL;
 #endif
+    qemu_del_vm_change_state_handler(s->change);
    unregister_savevm(s->qdev, "virtio-blk", s);
    blockdev_mark_auto_del(s->bs);
    virtio_cleanup(vdev);
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -20,6 +20,14 @@ typedef struct VirtConsole {
    CharDriverState *chr;
 } VirtConsole;

+void virtio_console_print_early(VirtIODevice *vdev, uint8_t *buf)
+{
+    VirtIOSerial *vser = (void*)vdev;
+    VirtIOSerialPort *port = find_port_by_id(vser, 0);
+    VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
+
+    qemu_chr_fe_write(vcon->chr, buf, strlen((char*)buf));
+}

 /* Callback function that's called when the guest sends us data */
 static ssize_t flush_buf(VirtIOSerialPort *port, const uint8_t *buf, size_t len)
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -44,7 +44,7 @@ typedef struct VirtIONet
    VirtIODevice vdev;
    uint8_t mac[ETH_ALEN];
    uint16_t status;
-    VirtIONetQueue vqs[MAX_QUEUE_NUM];
+    VirtIONetQueue *vqs;
    VirtQueue *ctrl_vq;
    NICState *nic;
    uint32_t tx_timeout;
@@ -62,8 +62,8 @@ typedef struct VirtIONet
    uint8_t nobcast;
    uint8_t vhost_started;
    struct {
-        int in_use;
-        int first_multi;
+        uint32_t in_use;
+        uint32_t first_multi;
        uint8_t multi_overflow;
        uint8_t uni_overflow;
        uint8_t *macs;
@@ -538,7 +538,7 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
        return VIRTIO_NET_ERR;
    }

-    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
+    if (mac_data.entries <= MAC_TABLE_ENTRIES - n->mac_table.in_use) {
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
@@ -1188,10 +1188,17 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
-        } else if (n->mac_table.in_use) {
-            uint8_t *buf = g_malloc0(n->mac_table.in_use);
-            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
-            g_free(buf);
+        } else {
+            int64_t i;
+
+            /* Overflow detected - can happen if source has a larger MAC table.
+             * We simply set overflow flag so there's no need to maintain the
+             * table of addresses, discard them all.
+             * Note: 64 bit math to avoid integer overflow.
+             */
+            for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
+                qemu_get_byte(f);
+            }
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
            n->mac_table.in_use = 0;
        }
@@ -1242,6 +1249,11 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
        }

        n->curr_queues = qemu_get_be16(f);
+        if (n->curr_queues > n->max_queues) {
+            error_report("virtio-net: curr_queues %x > max_queues %x",
+                         n->curr_queues, n->max_queues);
+            return -1;
+        }
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
@@ -1326,8 +1338,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
    n->vdev.set_status = virtio_net_set_status;
    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
+    n->max_queues = MAX(conf->queues, 1);
+    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
-    n->max_queues = conf->queues;
    n->curr_queues = 1;
    n->vqs[0].n = n;
    n->tx_timeout = net->txtimer;
@@ -1412,6 +1425,7 @@ void virtio_net_exit(VirtIODevice *vdev)
        }
    }

+    g_free(n->vqs);
    qemu_del_nic(n->nic);
    virtio_cleanup(&n->vdev);
 }
--- a/hw/virtio-scsi.c
+++ b/hw/virtio-scsi.c
@@ -267,6 +267,15 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
    qemu_get_be32s(f, &n);
    assert(n < s->conf->num_queues);
    qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
+    /* TODO: add a way for SCSIBusInfo's load_request to fail,
+     * and fail migration instead of asserting here.
+     * When we do, we might be able to re-enable NDEBUG below.
+     */
+#ifdef NDEBUG
+#error building with NDEBUG is not supported
+#endif
+    assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg));
+    assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg));
    virtio_scsi_parse_req(s, s->cmd_vqs[n], req);

    scsi_req_ref(sreq);
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -66,7 +66,7 @@ struct VirtIOSerial {
    struct VirtIOSerialPostLoad *post_load;
 };

-static VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id)
+VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id)
 {
    VirtIOSerialPort *port;

--- a/hw/virtio-serial.h
+++ b/hw/virtio-serial.h
@@ -205,4 +205,7 @@ size_t virtio_serial_guest_ready(VirtIOSerialPort *port);
 */
 void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle);

+void virtio_console_print_early(VirtIODevice *vdev, uint8_t *buf);
+VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id);
+
 #endif
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -423,6 +423,12 @@ void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
    unsigned int i;
    hwaddr len;

+    if (num_sg >= VIRTQUEUE_MAX_SIZE) {
+        error_report("virtio: map attempt out of bounds: %zd > %d",
+                     num_sg, VIRTQUEUE_MAX_SIZE);
+        exit(1);
+    }
+
    for (i = 0; i < num_sg; i++) {
        len = sg[i].iov_len;
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
@@ -561,10 +567,11 @@ uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 {
    uint8_t val;

-    vdev->get_config(vdev, vdev->config);
-
-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
+    }
+
+    vdev->get_config(vdev, vdev->config);

    val = ldub_p(vdev->config + addr);
    return val;
@@ -574,10 +581,11 @@ uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 {
    uint16_t val;

-    vdev->get_config(vdev, vdev->config);
-
-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
+    }
+
+    vdev->get_config(vdev, vdev->config);

    val = lduw_p(vdev->config + addr);
    return val;
@@ -587,10 +595,11 @@ uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 {
    uint32_t val;

-    vdev->get_config(vdev, vdev->config);
-
-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
+    }
+
+    vdev->get_config(vdev, vdev->config);

    val = ldl_p(vdev->config + addr);
    return val;
@@ -600,8 +609,9 @@ void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 {
    uint8_t val = data;

-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return;
+    }

    stb_p(vdev->config + addr, val);

@@ -613,8 +623,9 @@ void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 {
    uint16_t val = data;

-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return;
+    }

    stw_p(vdev->config + addr, val);

@@ -626,8 +637,9 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 {
    uint32_t val = data;

-    if (addr > (vdev->config_len - sizeof(val)))
+    if (addr + sizeof(val) > vdev->config_len) {
        return;
+    }

    stl_p(vdev->config + addr, val);

@@ -824,7 +836,9 @@ int virtio_set_features(VirtIODevice *vdev, uint32_t val)

 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
-    int num, i, ret;
+    int i, ret;
+    int32_t config_len;
+    uint32_t num;
    uint32_t features;
    uint32_t supported_features;

@@ -837,6 +851,9 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
    qemu_get_8s(f, &vdev->status);
    qemu_get_8s(f, &vdev->isr);
    qemu_get_be16s(f, &vdev->queue_sel);
+    if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) {
+        return -1;
+    }
    qemu_get_be32s(f, &features);

    if (virtio_set_features(vdev, features) < 0) {
@@ -845,11 +862,21 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
                     features, supported_features);
        return -1;
    }
-    vdev->config_len = qemu_get_be32(f);
+    config_len = qemu_get_be32(f);
+    if (config_len != vdev->config_len) {
+        error_report("Unexpected config length 0x%x. Expected 0x%zx",
+                     config_len, vdev->config_len);
+        return -1;
+    }
    qemu_get_buffer(f, vdev->config, vdev->config_len);

    num = qemu_get_be32(f);

+    if (num > VIRTIO_PCI_QUEUE_MAX) {
+        error_report("Invalid number of PCI queues: 0x%x", num);
+        return -1;
+    }
+
    for (i = 0; i < num; i++) {
        vdev->vq[i].vring.num = qemu_get_be32(f);
        vdev->vq[i].pa = qemu_get_be64(f);
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -1074,7 +1074,7 @@ static void vmsvga_screen_dump(void *opaque, const char *filename, bool cswitch,
                                 ds_get_height(s->vga.ds),
                                 32,
                                 ds_get_linesize(s->vga.ds),
-                                 s->vga.vram_ptr);
+                                 s->vga.vram_ptr, false);
        ppm_save(filename, ds, errp);
        g_free(ds);
    }
--- a/hw/vt82c686.c
+++ b/hw/vt82c686.c
@@ -362,7 +362,7 @@ static int vt82c686b_pm_initfn(PCIDevice *dev)

    acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
    acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-    acpi_pm1_cnt_init(&s->ar, &s->io);
+    acpi_pm1_cnt_init(&s->ar, &s->io, 2);

    return 0;
 }
--- a/hw/xenfb.c
+++ b/hw/xenfb.c
@@ -756,7 +756,8 @@ static void xenfb_update(void *opaque)
            qemu_free_displaysurface(xenfb->c.ds);
            xenfb->c.ds->surface = qemu_create_displaysurface_from
                (xenfb->width, xenfb->height, xenfb->depth,
-                 xenfb->row_stride, xenfb->pixels + xenfb->offset);
+                 xenfb->row_stride, xenfb->pixels + xenfb->offset,
+                 false);
            break;
        default:
            /* we must convert stuff */
--- a/hw/zaurus.c
+++ b/hw/zaurus.c
@@ -198,6 +198,15 @@ static bool is_version_0 (void *opaque, int version_id)
    return version_id == 0;
 }

+static bool vmstate_scoop_validate(void *opaque, int version_id)
+{
+    ScoopInfo *s = opaque;
+
+    return !(s->prev_level & 0xffff0000) &&
+        !(s->gpio_level & 0xffff0000) &&
+        !(s->gpio_dir & 0xffff0000);
+}
+
 static const VMStateDescription vmstate_scoop_regs = {
    .name = "scoop",
    .version_id = 1,
@@ -210,6 +219,7 @@ static const VMStateDescription vmstate_scoop_regs = {
        VMSTATE_UINT32(gpio_level, ScoopInfo),
        VMSTATE_UINT32(gpio_dir, ScoopInfo),
        VMSTATE_UINT32(prev_level, ScoopInfo),
+        VMSTATE_VALIDATE("irq levels are 16 bit", vmstate_scoop_validate),
        VMSTATE_UINT16(mcr, ScoopInfo),
        VMSTATE_UINT16(cdr, ScoopInfo),
        VMSTATE_UINT16(ccr, ScoopInfo),
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -496,6 +496,13 @@ typedef struct RAMBlock {
 #endif
 } RAMBlock;

+static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
+{
+    assert(offset < block->length);
+    assert(block->host);
+    return (char *)block->host + offset;
+}
+
 typedef struct RAMList {
    QemuMutex mutex;
    /* Protected by the iothread lock.  */
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -161,6 +161,7 @@ typedef struct CPUWatchpoint {
    uint32_t halted; /* Nonzero if the CPU is in suspend state */       \
    uint32_t interrupt_request;                                         \
    volatile sig_atomic_t exit_request;                                 \
+    volatile sig_atomic_t tcg_exit_req;                                 \
    CPU_COMMON_TLB                                                      \
    struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];           \
    /* buffer for temporaries in the code generator */                  \
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,10 +7,18 @@

 static TCGArg *icount_arg;
 static int icount_label;
+static int exitreq_label;

 static inline void gen_icount_start(void)
 {
    TCGv_i32 count;
+    TCGv_i32 flag;
+
+    exitreq_label = gen_new_label();
+    flag = tcg_temp_local_new_i32();
+    tcg_gen_ld_i32(flag, cpu_env, offsetof(CPUArchState, tcg_exit_req));
+    tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
+    tcg_temp_free_i32(flag);

    if (!use_icount)
        return;
@@ -29,10 +37,13 @@ static inline void gen_icount_start(void)

 static void gen_icount_end(TranslationBlock *tb, int num_insns)
 {
+    gen_set_label(exitreq_label);
+    tcg_gen_exit_tb((tcg_target_long)tb + TB_EXIT_REQUESTED);
+
    if (use_icount) {
        *icount_arg = num_insns;
        gen_set_label(icount_label);
-        tcg_gen_exit_tb((tcg_target_long)tb + 2);
+        tcg_gen_exit_tb((tcg_target_long)tb + TB_EXIT_ICOUNT_EXPIRED);
    }
 }

--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -103,6 +103,8 @@ extern SaveVMHandlers savevm_ram_handlers;

 uint64_t dup_mig_bytes_transferred(void);
 uint64_t dup_mig_pages_transferred(void);
+uint64_t skipped_mig_bytes_transferred(void);
+uint64_t skipped_mig_pages_transferred(void);
 uint64_t norm_mig_bytes_transferred(void);
 uint64_t norm_mig_pages_transferred(void);
 uint64_t xbzrle_mig_bytes_transferred(void);
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -83,6 +83,7 @@ enum VMStateFlags {
    VMS_MULTIPLY         = 0x200,  /* multiply "size" field by field_size */
    VMS_VARRAY_UINT8     = 0x400,  /* Array with size in uint8_t field*/
    VMS_VARRAY_UINT32    = 0x800,  /* Array with size in uint32_t field*/
+    VMS_MUST_EXIST       = 0x1000, /* Field must exist in input */
 };

 typedef struct {
@@ -174,6 +175,14 @@ extern const VMStateInfo vmstate_info_bitmap;
    .offset       = vmstate_offset_value(_state, _field, _type),     \
 }

+/* Validate state using a boolean predicate. */
+#define VMSTATE_VALIDATE(_name, _test) { \
+    .name         = (_name),                                         \
+    .field_exists = (_test),                                         \
+    .flags        = VMS_ARRAY | VMS_MUST_EXIST,                      \
+    .num          = 0, /* 0 elements: no data, only run _test */     \
+}
+
 #define VMSTATE_POINTER(_field, _state, _version, _info, _type) {    \
    .name       = (stringify(_field)),                               \
    .version_id = (_version),                                        \
@@ -502,7 +511,7 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_UINT32_EQUAL(_f, _s)                                   \
    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint32_equal, uint32_t)

-#define VMSTATE_INT32_LE(_f, _s)                                   \
+#define VMSTATE_INT32_POSITIVE_LE(_f, _s)                             \
    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)

 #define VMSTATE_UINT8_TEST(_f, _s, _t)                               \
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -72,7 +72,7 @@ struct NetClientState {
 };

 typedef struct NICState {
-    NetClientState ncs[MAX_QUEUE_NUM];
+    NetClientState *ncs;
    NICConf *conf;
    void *opaque;
    bool peer_deleted;
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -430,4 +430,41 @@ int64_t pow2floor(int64_t value);
 int uleb128_encode_small(uint8_t *out, uint32_t n);
 int uleb128_decode_small(const uint8_t *in, uint32_t *n);

+/*
+ * Hexdump a buffer to a file. An optional string prefix is added to every line
+ */
+
+void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
+
+/* vector definitions */
+#ifdef __ALTIVEC__
+#include <altivec.h>
+#define VECTYPE        vector unsigned char
+#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
+#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
+/* altivec.h may redefine the bool macro as vector type.
+ * Reset it to POSIX semantics. */
+#undef bool
+#define bool _Bool
+#elif defined __SSE2__
+#include <emmintrin.h>
+#define VECTYPE        __m128i
+#define SPLAT(p)       _mm_set1_epi8(*(p))
+#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
+#else
+#define VECTYPE        unsigned long
+#define SPLAT(p)       (*(p) * (~0UL / 255))
+#define ALL_EQ(v1, v2) ((v1) == (v2))
+#endif
+
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
+static inline bool
+can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
+                   * sizeof(VECTYPE)) == 0
+            && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
+}
+size_t buffer_find_nonzero_offset(const void *buf, size_t len);
+
 #endif
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .4.0
 .4.2