tests/qtest: Add a test for migration with direct-io and multifd

Signed-off-by: Fabiano Rosas <farosas@suse.de>
migration: Add direct-io parameter
2023-10-20 10:32:56 -03:00 · 2023-10-20 10:32:56 -03:00 · 2023-10-20 10:32:56 -03:00 · 2023-10-20 10:32:56 -03:00 · 2023-10-20 10:32:54 -03:00 · 2023-10-20 10:32:39 -03:00
32 changed files with 1581 additions and 233 deletions
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -566,6 +566,20 @@ Others (especially either older devices or system devices which for
 some reason don't have a bus concept) make use of the ``instance id``
 for otherwise identically named devices.

+Fixed-ram format
+----------------
+
+When the ``fixed-ram`` capability is enabled, a slightly different
+stream format is used for the RAM section. Instead of having a
+sequential stream of pages that follow the RAMBlock headers, the dirty
+pages for a RAMBlock follow its header. This ensures that each RAM
+page has a fixed offset in the resulting migration stream.
+
+The ``fixed-ram`` capaility can be enabled in both source and
+destination with:
+
+    ``migrate_set_capability fixed-ram on``
+
 Return path
 -----------

--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -44,6 +44,14 @@ struct RAMBlock {
    size_t page_size;
    /* dirty bitmap used during migration */
    unsigned long *bmap;
+    /* shadow dirty bitmap used when migrating to a file */
+    unsigned long *shadow_bmap;
+    /*
+     * offset in the file pages belonging to this ramblock are saved,
+     * used only during migration to a file.
+     */
+    off_t bitmap_offset;
+    uint64_t pages_offset;
    /* bitmap of already received pages in postcopy */
    unsigned long *receivedmap;

--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -33,8 +33,10 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 #define QIO_CHANNEL_ERR_BLOCK -2

 #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+#define QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET 0x2

 #define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+#define QIO_CHANNEL_READ_FLAG_WITH_OFFSET 0x2

 typedef enum QIOChannelFeature QIOChannelFeature;

@@ -44,6 +46,7 @@ enum QIOChannelFeature {
    QIO_CHANNEL_FEATURE_LISTEN,
    QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
+    QIO_CHANNEL_FEATURE_SEEKABLE,
 };


@@ -130,6 +133,16 @@ struct QIOChannelClass {
                           Error **errp);

    /* Optional callbacks */
+    ssize_t (*io_pwritev)(QIOChannel *ioc,
+                          const struct iovec *iov,
+                          size_t niov,
+                          off_t offset,
+                          Error **errp);
+    ssize_t (*io_preadv)(QIOChannel *ioc,
+                         const struct iovec *iov,
+                         size_t niov,
+                         off_t offset,
+                         Error **errp);
    int (*io_shutdown)(QIOChannel *ioc,
                       QIOChannelShutdown how,
                       Error **errp);
@@ -528,6 +541,126 @@ void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled);
 int qio_channel_close(QIOChannel *ioc,
                      Error **errp);

+/**
+ * qio_channel_pwritev_full
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @offset: offset in the channel where writes should begin
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Not all implementations will support this facility, so may report
+ * an error. To avoid errors, the caller may check for the feature
+ * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method.
+ *
+ * Behaves as qio_channel_writev_full, apart from not supporting
+ * sending of file handles as well as beginning the write at the
+ * passed @offset
+ *
+ */
+ssize_t qio_channel_pwritev_full(QIOChannel *ioc, const struct iovec *iov,
+                                 size_t niov, off_t offset, Error **errp);
+
+/**
+ * qio_channel_write_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @offset: the iovec offset in the file where to write the data
+ * @fds: an array of file handles to send
+ * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Selects between a writev or pwritev channel writer function.
+ *
+ * If QIO_CHANNEL_WRITE_FLAG_OFFSET is passed in flags, pwritev is
+ * used and @offset is expected to be a meaningful value, @fds and
+ * @nfds are ignored; otherwise uses writev and @offset is ignored.
+ *
+ * Returns: 0 if all bytes were written, or -1 on error
+ */
+int qio_channel_write_full_all(QIOChannel *ioc, const struct iovec *iov,
+                               size_t niov, off_t offset, int *fds, size_t nfds,
+                               int flags, Error **errp);
+
+/**
+ * qio_channel_pwritev
+ * @ioc: the channel object
+ * @buf: the memory region to write data into
+ * @buflen: the number of bytes to @buf
+ * @offset: offset in the channel where writes should begin
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Not all implementations will support this facility, so may report
+ * an error. To avoid errors, the caller may check for the feature
+ * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method.
+ *
+ */
+ssize_t qio_channel_pwritev(QIOChannel *ioc, char *buf, size_t buflen,
+                            off_t offset, Error **errp);
+
+/**
+ * qio_channel_preadv_full
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data into
+ * @niov: the length of the @iov array
+ * @offset: offset in the channel where writes should begin
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Not all implementations will support this facility, so may report
+ * an error.  To avoid errors, the caller may check for the feature
+ * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method.
+ *
+ * Behaves as qio_channel_readv_full, apart from not supporting
+ * receiving of file handles as well as beginning the read at the
+ * passed @offset
+ *
+ */
+ssize_t qio_channel_preadv_full(QIOChannel *ioc, const struct iovec *iov,
+                                size_t niov, off_t offset, Error **errp);
+
+/**
+ * qio_channel_read_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @offset: the iovec offset in the file from where to read the data
+ * @fds: an array of file handles to send
+ * @nfds: number of file handles in @fds
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Selects between a readv or preadv channel reader function.
+ *
+ * If QIO_CHANNEL_READ_FLAG_OFFSET is passed in flags, preadv is
+ * used and @offset is expected to be a meaningful value, @fds and
+ * @nfds are ignored; otherwise uses readv and @offset is ignored.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+int qio_channel_read_full_all(QIOChannel *ioc, const struct iovec *iov,
+                              size_t niov, off_t offset,
+                              int flags, Error **errp);
+
+/**
+ * qio_channel_preadv
+ * @ioc: the channel object
+ * @buf: the memory region to write data into
+ * @buflen: the number of bytes to @buf
+ * @offset: offset in the channel where writes should begin
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Not all implementations will support this facility, so may report
+ * an error.  To avoid errors, the caller may check for the feature
+ * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method.
+ *
+ */
+ssize_t qio_channel_preadv(QIOChannel *ioc, char *buf, size_t buflen,
+                           off_t offset, Error **errp);
+
 /**
 * qio_channel_shutdown:
 * @ioc: the channel object
--- a/include/migration/global_state.h
+++ b/include/migration/global_state.h
@@ -16,7 +16,8 @@
 #include "qapi/qapi-types-run-state.h"

 void register_global_state(void);
-void global_state_store(void);
+RunState global_state_store(void);
+RunState global_state_store_once(void);
 void global_state_store_running(void);
 bool global_state_received(void);
 RunState global_state_get_runstate(void);
--- a/include/migration/qemu-file-types.h
+++ b/include/migration/qemu-file-types.h
@@ -50,6 +50,8 @@ unsigned int qemu_get_be16(QEMUFile *f);
 unsigned int qemu_get_be32(QEMUFile *f);
 uint64_t qemu_get_be64(QEMUFile *f);

+bool qemu_file_is_seekable(QEMUFile *f);
+
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
 {
    qemu_put_be64(f, *pv);
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -67,6 +67,19 @@ static inline void clear_bit(long nr, unsigned long *addr)
    *p &= ~mask;
 }

+/**
+ * clear_bit_atomic - Clears a bit in memory atomically
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ */
+static inline void clear_bit_atomic(long nr, unsigned long *addr)
+{
+    unsigned long mask = BIT_MASK(nr);
+    unsigned long *p = addr + BIT_WORD(nr);
+
+    return qatomic_and(p, ~mask);
+}
+
 /**
 * change_bit - Toggle a bit in memory
 * @nr: Bit to change
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -597,6 +597,8 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
 bool qemu_has_ofd_lock(void);
 #endif

+bool qemu_has_direct_io(void);
+
 #if defined(__HAIKU__) && defined(__i386__)
 #define FMT_pid "%ld"
 #elif defined(WIN64)
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -36,6 +36,10 @@ qio_channel_file_new_fd(int fd)

    ioc->fd = fd;

+    if (lseek(fd, 0, SEEK_CUR) != (off_t)-1) {
+        qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SEEKABLE);
+    }
+
    trace_qio_channel_file_new_fd(ioc, fd);

    return ioc;
@@ -60,6 +64,10 @@ qio_channel_file_new_path(const char *path,
        return NULL;
    }

+    if (lseek(ioc->fd, 0, SEEK_CUR) != (off_t)-1) {
+        qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SEEKABLE);
+    }
+
    trace_qio_channel_file_new_path(ioc, path, flags, mode, ioc->fd);

    return ioc;
@@ -138,6 +146,56 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
    return ret;
 }

+static ssize_t qio_channel_file_preadv(QIOChannel *ioc,
+                                       const struct iovec *iov,
+                                       size_t niov,
+                                       off_t offset,
+                                       Error **errp)
+{
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
+    ssize_t ret;
+
+ retry:
+    ret = preadv(fioc->fd, iov, niov, offset);
+    if (ret < 0) {
+        if (errno == EAGAIN) {
+            return QIO_CHANNEL_ERR_BLOCK;
+        }
+        if (errno == EINTR) {
+            goto retry;
+        }
+
+        error_setg_errno(errp, errno, "Unable to read from file");
+        return -1;
+    }
+
+    return ret;
+}
+
+static ssize_t qio_channel_file_pwritev(QIOChannel *ioc,
+                                        const struct iovec *iov,
+                                        size_t niov,
+                                        off_t offset,
+                                        Error **errp)
+{
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
+    ssize_t ret;
+
+ retry:
+    ret = pwritev(fioc->fd, iov, niov, offset);
+    if (ret <= 0) {
+        if (errno == EAGAIN) {
+            return QIO_CHANNEL_ERR_BLOCK;
+        }
+        if (errno == EINTR) {
+            goto retry;
+        }
+        error_setg_errno(errp, errno, "Unable to write to file");
+        return -1;
+    }
+    return ret;
+}
+
 static int qio_channel_file_set_blocking(QIOChannel *ioc,
                                         bool enabled,
                                         Error **errp)
@@ -223,6 +281,8 @@ static void qio_channel_file_class_init(ObjectClass *klass,
    ioc_klass->io_writev = qio_channel_file_writev;
    ioc_klass->io_readv = qio_channel_file_readv;
    ioc_klass->io_set_blocking = qio_channel_file_set_blocking;
+    ioc_klass->io_pwritev = qio_channel_file_pwritev;
+    ioc_klass->io_preadv = qio_channel_file_preadv;
    ioc_klass->io_seek = qio_channel_file_seek;
    ioc_klass->io_close = qio_channel_file_close;
    ioc_klass->io_create_watch = qio_channel_file_create_watch;
--- a/io/channel.c
+++ b/io/channel.c
@@ -454,6 +454,146 @@ GSource *qio_channel_add_watch_source(QIOChannel *ioc,
 }


+ssize_t qio_channel_pwritev_full(QIOChannel *ioc, const struct iovec *iov,
+                                 size_t niov, off_t offset, Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_pwritev) {
+        error_setg(errp, "Channel does not support pwritev");
+        return -1;
+    }
+
+    if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SEEKABLE)) {
+        error_setg_errno(errp, EINVAL, "Requested channel is not seekable");
+        return -1;
+    }
+
+    return klass->io_pwritev(ioc, iov, niov, offset, errp);
+}
+
+static int qio_channel_preadv_pwritev_contiguous(QIOChannel *ioc,
+                                                 const struct iovec *iov,
+                                                 size_t niov, off_t offset,
+                                                 bool is_write, Error **errp)
+{
+    ssize_t ret;
+    int i, slice_idx, slice_num;
+    uint64_t base, next, file_offset;
+    size_t len;
+
+    slice_idx = 0;
+    slice_num = 1;
+
+    /*
+     * If the iov array doesn't have contiguous elements, we need to
+     * split it in slices because we only have one (file) 'offset' for
+     * the whole iov. Do this here so callers don't need to break the
+     * iov array themselves.
+     */
+    for (i = 0; i < niov; i++, slice_num++) {
+        base = (uint64_t) iov[i].iov_base;
+
+        if (i != niov - 1) {
+            len = iov[i].iov_len;
+            next = (uint64_t) iov[i + 1].iov_base;
+
+            if (base + len == next) {
+                continue;
+            }
+        }
+
+        /*
+         * Use the offset of the first element of the segment that
+         * we're sending.
+         */
+        file_offset = offset + (uint64_t) iov[slice_idx].iov_base;
+
+        if (is_write) {
+            ret = qio_channel_pwritev_full(ioc, &iov[slice_idx], slice_num,
+                                           file_offset, errp);
+        } else {
+            ret = qio_channel_preadv_full(ioc, &iov[slice_idx], slice_num,
+                                          file_offset, errp);
+        }
+
+        if (ret < 0) {
+            break;
+        }
+
+        slice_idx += slice_num;
+        slice_num = 0;
+    }
+
+    return (ret < 0) ? -1 : 0;
+}
+
+int qio_channel_write_full_all(QIOChannel *ioc,
+                                const struct iovec *iov,
+                                size_t niov, off_t offset,
+                                int *fds, size_t nfds,
+                                int flags, Error **errp)
+{
+    if (flags & QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET) {
+        return qio_channel_preadv_pwritev_contiguous(ioc, iov, niov,
+                                                     offset, true, errp);
+    }
+
+    return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, flags, errp);
+}
+
+ssize_t qio_channel_pwritev(QIOChannel *ioc, char *buf, size_t buflen,
+                            off_t offset, Error **errp)
+{
+    struct iovec iov = {
+        .iov_base = buf,
+        .iov_len = buflen
+    };
+
+    return qio_channel_pwritev_full(ioc, &iov, 1, offset, errp);
+}
+
+ssize_t qio_channel_preadv_full(QIOChannel *ioc, const struct iovec *iov,
+                                size_t niov, off_t offset, Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_preadv) {
+        error_setg(errp, "Channel does not support preadv");
+        return -1;
+    }
+
+    if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SEEKABLE)) {
+        error_setg_errno(errp, EINVAL, "Requested channel is not seekable");
+        return -1;
+    }
+
+    return klass->io_preadv(ioc, iov, niov, offset, errp);
+}
+
+int qio_channel_read_full_all(QIOChannel *ioc, const struct iovec *iov,
+                              size_t niov, off_t offset,
+                              int flags, Error **errp)
+{
+    if (flags & QIO_CHANNEL_READ_FLAG_WITH_OFFSET) {
+        return qio_channel_preadv_pwritev_contiguous(ioc, iov, niov,
+                                                     offset, false, errp);
+    }
+
+    return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
+}
+
+ssize_t qio_channel_preadv(QIOChannel *ioc, char *buf, size_t buflen,
+                           off_t offset, Error **errp)
+{
+    struct iovec iov = {
+        .iov_base = buf,
+        .iov_len = buflen
+    };
+
+    return qio_channel_preadv_full(ioc, &iov, 1, offset, errp);
+}
+
 int qio_channel_shutdown(QIOChannel *ioc,
                         QIOChannelShutdown how,
                         Error **errp)
--- a/migration/file.c
+++ b/migration/file.c
@@ -6,17 +6,25 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
 #include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "channel.h"
 #include "file.h"
-#include "migration.h"
 #include "io/channel-file.h"
 #include "io/channel-util.h"
+#include "migration.h"
+#include "options.h"
 #include "trace.h"

 #define OFFSET_OPTION ",offset="

+static struct FileOutgoingArgs {
+    char *fname;
+    int flags;
+    int mode;
+} outgoing_args;
+
 /* Remove the offset option from @filespec and return it in @offsetp. */

 static int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp)
@@ -36,13 +44,71 @@ static int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp)
    return 0;
 }

+static void qio_channel_file_connect_worker(QIOTask *task, gpointer opaque)
+{
+    /* noop */
+}
+
+static void file_migration_cancel(Error *errp)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+                      MIGRATION_STATUS_FAILED);
+    migration_cancel(errp);
+}
+
+int file_send_channel_destroy(QIOChannel *ioc)
+{
+    if (ioc) {
+        qio_channel_close(ioc, NULL);
+        object_unref(OBJECT(ioc));
+    }
+    g_free(outgoing_args.fname);
+    outgoing_args.fname = NULL;
+
+    return 0;
+}
+
+void file_send_channel_create(QIOTaskFunc f, void *data)
+{
+    QIOChannelFile *ioc;
+    QIOTask *task;
+    Error *errp = NULL;
+    int flags = outgoing_args.flags;
+
+    if (migrate_direct_io() && qemu_has_direct_io()) {
+        /*
+         * Enable O_DIRECT for the secondary channels. These are used
+         * for sending ram pages and writes should be guaranteed to be
+         * aligned to at least page size.
+         */
+        flags |= O_DIRECT;
+    }
+
+    ioc = qio_channel_file_new_path(outgoing_args.fname, flags,
+                                    outgoing_args.mode, &errp);
+    if (!ioc) {
+        file_migration_cancel(errp);
+        return;
+    }
+
+    task = qio_task_new(OBJECT(ioc), f, (gpointer)data, NULL);
+    qio_task_run_in_thread(task, qio_channel_file_connect_worker,
+                           (gpointer)data, NULL, NULL);
+}
+
 void file_start_outgoing_migration(MigrationState *s, const char *filespec,
                                   Error **errp)
 {
-    g_autofree char *filename = g_strdup(filespec);
    g_autoptr(QIOChannelFile) fioc = NULL;
+    g_autofree char *filename = g_strdup(filespec);
    uint64_t offset = 0;
    QIOChannel *ioc;
+    int flags = O_CREAT | O_TRUNC | O_WRONLY;
+    mode_t mode = 0660;

    trace_migration_file_outgoing(filename);

@@ -50,12 +116,15 @@ void file_start_outgoing_migration(MigrationState *s, const char *filespec,
        return;
    }

-    fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | O_TRUNC,
-                                     0600, errp);
+    fioc = qio_channel_file_new_path(filename, flags, mode, errp);
    if (!fioc) {
        return;
    }

+    outgoing_args.fname = g_strdup(filename);
+    outgoing_args.flags = flags;
+    outgoing_args.mode = mode;
+
    ioc = QIO_CHANNEL(fioc);
    if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) {
        return;
@@ -78,7 +147,8 @@ void file_start_incoming_migration(const char *filespec, Error **errp)
    g_autofree char *filename = g_strdup(filespec);
    QIOChannelFile *fioc = NULL;
    uint64_t offset = 0;
-    QIOChannel *ioc;
+    int channels = 1;
+    int i = 0, fd;

    trace_migration_file_incoming(filename);

@@ -88,16 +158,32 @@ void file_start_incoming_migration(const char *filespec, Error **errp)

    fioc = qio_channel_file_new_path(filename, O_RDONLY, 0, errp);
    if (!fioc) {
-        return;
+        goto out;
    }

-    ioc = QIO_CHANNEL(fioc);
+    if (migrate_multifd()) {
+        channels += migrate_multifd_channels();
+    }
+
+    fd = fioc->fd;
+
+    do {
+        QIOChannel *ioc = QIO_CHANNEL(fioc);
+
        if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) {
            return;
        }
-    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-file-incoming");
+
+        qio_channel_set_name(ioc, "migration-file-incoming");
        qio_channel_add_watch_full(ioc, G_IO_IN,
                                   file_accept_incoming_migration,
                                   NULL, NULL,
                                   g_main_context_get_thread_default());
+    } while (++i < channels && (fioc = qio_channel_file_new_fd(fd)));
+
+out:
+    if (!fioc) {
+        error_report("Error creating migration incoming channel");
+        return;
+    }
 }
--- a/migration/file.h
+++ b/migration/file.h
@@ -7,8 +7,14 @@

 #ifndef QEMU_MIGRATION_FILE_H
 #define QEMU_MIGRATION_FILE_H
-void file_start_incoming_migration(const char *filename, Error **errp);

-void file_start_outgoing_migration(MigrationState *s, const char *filename,
+#include "io/task.h"
+#include "channel.h"
+
+void file_start_incoming_migration(const char *filespec, Error **errp);
+
+void file_start_outgoing_migration(MigrationState *s, const char *filespec,
                                   Error **errp);
+void file_send_channel_create(QIOTaskFunc f, void *data);
+int file_send_channel_destroy(QIOChannel *ioc);
 #endif
--- a/migration/global_state.c
+++ b/migration/global_state.c
@@ -37,9 +37,25 @@ static void global_state_do_store(RunState state)
              state_str, '\0');
 }

-void global_state_store(void)
+RunState global_state_store(void)
 {
-    global_state_do_store(runstate_get());
+    RunState r = runstate_get();
+
+    global_state_do_store(r);
+    return r;
+}
+
+RunState global_state_store_once(void)
+{
+    int r;
+    char *runstate = (char *)global_state.runstate;
+
+    r = qapi_enum_parse(&RunState_lookup, runstate, -1, NULL);
+    if (r < 0) {
+        return global_state_store();
+    }
+
+    return r;
 }

 void global_state_store_running(void)
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -387,6 +387,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
        monitor_printf(mon, "%s: %" PRIu64 " MB/s\n",
            MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT),
            params->vcpu_dirty_limit);
+
+        if (params->has_direct_io) {
+            monitor_printf(mon, "%s: %s\n",
+                           MigrationParameter_str(MIGRATION_PARAMETER_DIRECT_IO),
+                           params->direct_io ? "on" : "off");
+        }
    }

    qapi_free_MigrationParameters(params);
@@ -661,6 +667,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
        p->has_vcpu_dirty_limit = true;
        visit_type_size(v, param, &p->vcpu_dirty_limit, &err);
        break;
+    case MIGRATION_PARAMETER_DIRECT_IO:
+        p->has_direct_io = true;
+        visit_type_bool(v, param, &p->direct_io, &err);
+        break;
    default:
        assert(0);
    }
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -106,24 +106,60 @@ static bool migration_needs_multiple_sockets(void)
    return migrate_multifd() || migrate_postcopy_preempt();
 }

+static bool migration_needs_seekable_channel(void)
+{
+    return migrate_fixed_ram();
+}
+
 static bool uri_supports_multi_channels(const char *uri)
 {
    return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
-           strstart(uri, "vsock:", NULL);
+           strstart(uri, "vsock:", NULL) || strstart(uri, "file:", NULL);
+}
+
+static bool uri_supports_seeking(const char *uri)
+{
+    return strstart(uri, "file:", NULL);
 }

 static bool
 migration_channels_and_uri_compatible(const char *uri, Error **errp)
 {
+    bool compatible = true;
+
+    if (migration_needs_seekable_channel() &&
+        !uri_supports_seeking(uri)) {
+        error_setg(errp, "Migration requires seekable transport (e.g. file)");
+        compatible = false;
+    }
+
    if (migration_needs_multiple_sockets() &&
        !uri_supports_multi_channels(uri)) {
        error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
+        compatible = false;
+    }
+
+    return compatible;
+}
+
+static bool migration_should_pause(const char *uri)
+{
+    if (!migrate_auto_pause()) {
        return false;
    }

+    /*
+     * Return true for migration schemes that benefit from a nonlive
+     * migration.
+     */
+
+    if (strstart(uri, "file:", NULL)) {
        return true;
    }

+    return false;
+}
+
 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
 {
    uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
@@ -720,6 +756,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
        }

        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+    } else if (migrate_multifd() && migrate_fixed_ram()) {
+        default_channel = multifd_recv_first_channel();
    } else {
        default_channel = !mis->from_src_file;
    }
@@ -1708,6 +1746,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
        }
    }

+    if (migration_should_pause(uri)) {
+        global_state_store();
+        vm_stop_force_state(RUN_STATE_PAUSED);
+    }
+
    if (strstart(uri, "tcp:", &p) ||
        strstart(uri, "unix:", NULL) ||
        strstart(uri, "vsock:", NULL)) {
@@ -2128,7 +2171,7 @@ static int postcopy_start(MigrationState *ms, Error **errp)
    trace_postcopy_start_set_run();

    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-    global_state_store();
+    global_state_store_once();
    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
    if (ret < 0) {
        goto fail;
@@ -2328,8 +2371,7 @@ static int migration_completion_precopy(MigrationState *s,
    s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);

-    s->vm_old_state = runstate_get();
-    global_state_store();
+    s->vm_old_state = global_state_store_once();

    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
    trace_migration_completion_vm_stop(ret);
@@ -2676,7 +2718,7 @@ static MigThrError migration_detect_error(MigrationState *s)
    }
 }

-static void migration_calculate_complete(MigrationState *s)
+void migration_calculate_complete(MigrationState *s)
 {
    uint64_t bytes = migration_transferred_bytes(s->to_dst_file);
    int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
@@ -2708,8 +2750,7 @@ static void update_iteration_initial_status(MigrationState *s)
    s->iteration_initial_pages = ram_get_total_transferred_pages();
 }

-static void migration_update_counters(MigrationState *s,
-                                      int64_t current_time)
+void migration_update_counters(MigrationState *s, int64_t current_time)
 {
    uint64_t transferred, transferred_pages, time_spent;
    uint64_t current_bytes; /* bytes transferred since the beginning */
@@ -2843,6 +2884,7 @@ static void migration_iteration_finish(MigrationState *s)
    case MIGRATION_STATUS_COMPLETED:
        migration_calculate_complete(s);
        runstate_set(RUN_STATE_POSTMIGRATE);
+        trace_migration_status((int)s->mbps / 8, (int)s->pages_per_second, s->total_time);
        break;
    case MIGRATION_STATUS_COLO:
        assert(migrate_colo());
@@ -3185,9 +3227,8 @@ static void *bg_migration_thread(void *opaque)
     * transition in vm_stop_force_state() we need to wakeup it up.
     */
    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-    s->vm_old_state = runstate_get();
+    s->vm_old_state = global_state_store_once();

-    global_state_store();
    /* Forcibly stop VM before saving state of vCPUs and devices */
    if (vm_stop_force_state(RUN_STATE_PAUSED)) {
        goto fail;
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -474,7 +474,9 @@ struct MigrationState {
 };

 void migrate_set_state(int *state, int old_state, int new_state);
-
+void migration_calculate_complete(MigrationState *s);
+void migration_update_counters(MigrationState *s,
+                               int64_t current_time);
 void migration_fd_process_incoming(QEMUFile *f, Error **errp);
 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
 void migration_incoming_process(void);
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -17,6 +17,7 @@
 #include "exec/ramblock.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "file.h"
 #include "ram.h"
 #include "migration.h"
 #include "migration-stats.h"
@@ -28,6 +29,7 @@
 #include "threadinfo.h"
 #include "options.h"
 #include "qemu/yank.h"
+#include "io/channel-file.h"
 #include "io/channel-socket.h"
 #include "yank_functions.h"

@@ -140,6 +142,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p)
 static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
 {
    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+    uint64_t read_base = 0;

    if (flags != MULTIFD_FLAG_NOCOMP) {
        error_setg(errp, "multifd %u: flags received %x flags expected %x",
@@ -150,7 +153,13 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
        p->iov[i].iov_base = p->host + p->normal[i];
        p->iov[i].iov_len = p->page_size;
    }
-    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
+
+    if (migrate_fixed_ram()) {
+        read_base = p->pages->block->pages_offset - (uint64_t) p->host;
+    }
+
+    return qio_channel_read_full_all(p->c, p->iov, p->normal_num, read_base,
+                                     p->read_flags, errp);
 }

 static MultiFDMethods multifd_nocomp_ops = {
@@ -258,6 +267,19 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
    g_free(pages);
 }

+static void multifd_set_file_bitmap(MultiFDSendParams *p)
+{
+    MultiFDPages_t *pages = p->pages;
+
+    if (!pages->block) {
+        return;
+    }
+
+    for (int i = 0; i < p->normal_num; i++) {
+        ramblock_set_shadow_bmap_atomic(pages->block, pages->offset[i]);
+    }
+}
+
 static void multifd_send_fill_packet(MultiFDSendParams *p)
 {
    MultiFDPacket_t *packet = p->packet;
@@ -512,8 +534,12 @@ static void multifd_send_terminate_threads(Error *err)

 static int multifd_send_channel_destroy(QIOChannel *send)
 {
+    if (migrate_to_file()) {
+        return file_send_channel_destroy(send);
+    } else {
        return socket_send_channel_destroy(send);
    }
+}

 void multifd_save_cleanup(void)
 {
@@ -541,7 +567,7 @@ void multifd_save_cleanup(void)
        p->c = NULL;
        qemu_mutex_destroy(&p->mutex);
        qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
+        qemu_sem_destroy(&p->sem_done);
        g_free(p->name);
        p->name = NULL;
        multifd_pages_clear(p->pages);
@@ -585,19 +611,15 @@ static int multifd_zero_copy_flush(QIOChannel *c)
    return ret;
 }

-int multifd_send_sync_main(QEMUFile *f)
+static int multifd_send_wait(void)
 {
-    int i;
    bool flush_zero_copy;
+    int i;

-    if (!migrate_multifd()) {
-        return 0;
-    }
-    if (multifd_send_state->pages->num) {
-        if (multifd_send_pages(f) < 0) {
-            error_report("%s: multifd_send_pages fail", __func__);
-            return -1;
-        }
+    /* wait for all channels to be idle */
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        trace_multifd_send_wait(migrate_multifd_channels() - i);
+        qemu_sem_wait(&multifd_send_state->channels_ready);
    }

    /*
@@ -610,14 +632,60 @@ int multifd_send_sync_main(QEMUFile *f)
     * to be less frequent, e.g. only after we finished one whole scanning of
     * all the dirty bitmaps.
     */
-
    flush_zero_copy = migrate_zero_copy_send();

+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
+        qemu_mutex_lock(&p->mutex);
+        assert(!p->pending_job);
+        qemu_mutex_unlock(&p->mutex);
+
+        qemu_sem_post(&p->sem);
+        qemu_sem_wait(&p->sem_done);
+
+        if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
+            return -1;
+        }
+    }
+
+    /*
+     * All channels went idle and have no more jobs. Unless we send
+     * them more work, we're good to allow any cleanup code to run at
+     * this point.
+     */
+
+    return 0;
+}
+
+int multifd_send_sync_main(QEMUFile *f)
+{
+    int i, ret;
+
+    if (!migrate_multifd()) {
+        return 0;
+    }
+    if (multifd_send_state->pages->num) {
+        if (multifd_send_pages(f) < 0) {
+            error_report("%s: multifd_send_pages fail", __func__);
+            return -1;
+        }
+    }
+
+    if (!migrate_multifd_packets()) {
+        /*
+         * There's no sync packet to send. Just make sure the sending
+         * above has finished.
+         */
+        return multifd_send_wait();
+    }
+
    for (i = 0; i < migrate_multifd_channels(); i++) {
        MultiFDSendParams *p = &multifd_send_state->params[i];

        trace_multifd_send_sync_main_signal(p->id);

+        qemu_sem_wait(&multifd_send_state->channels_ready);
        qemu_mutex_lock(&p->mutex);

        if (p->quit) {
@@ -632,20 +700,11 @@ int multifd_send_sync_main(QEMUFile *f)
        qemu_mutex_unlock(&p->mutex);
        qemu_sem_post(&p->sem);
    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];

-        qemu_sem_wait(&multifd_send_state->channels_ready);
-        trace_multifd_send_sync_main_wait(p->id);
-        qemu_sem_wait(&p->sem_sync);
-
-        if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
-            return -1;
-        }
-    }
+    ret = multifd_send_wait();
    trace_multifd_send_sync_main(multifd_send_state->packet_num);

-    return 0;
+    return ret;
 }

 static void *multifd_send_thread(void *opaque)
@@ -655,18 +714,22 @@ static void *multifd_send_thread(void *opaque)
    Error *local_err = NULL;
    int ret = 0;
    bool use_zero_copy_send = migrate_zero_copy_send();
+    bool use_packets = migrate_multifd_packets();

    thread = migration_threads_add(p->name, qemu_get_thread_id());

    trace_multifd_send_thread_start(p->id);
    rcu_register_thread();

+    if (use_packets) {
        if (multifd_send_initial_packet(p, &local_err) < 0) {
            ret = -1;
            goto out;
        }
+
        /* initial packet */
        p->num_packets = 1;
+    }

    while (true) {
        qemu_sem_post(&multifd_send_state->channels_ready);
@@ -678,11 +741,12 @@ static void *multifd_send_thread(void *opaque)
        qemu_mutex_lock(&p->mutex);

        if (p->pending_job) {
-            uint64_t packet_num = p->packet_num;
            uint32_t flags;
+            uint64_t write_base;
+
            p->normal_num = 0;

-            if (use_zero_copy_send) {
+            if (!use_packets || use_zero_copy_send) {
                p->iovs_num = 0;
            } else {
                p->iovs_num = 1;
@@ -700,16 +764,30 @@ static void *multifd_send_thread(void *opaque)
                    break;
                }
            }
+
+            if (use_packets) {
                multifd_send_fill_packet(p);
+                p->num_packets++;
+                write_base = 0;
+            } else {
+                multifd_set_file_bitmap(p);
+
+                /*
+                 * If we subtract the host page now, we don't need to
+                 * pass it into qio_channel_write_full_all() below.
+                 */
+                write_base = p->pages->block->pages_offset -
+                    (uint64_t)p->pages->block->host;
+            }
+
            flags = p->flags;
            p->flags = 0;
-            p->num_packets++;
            p->total_normal_pages += p->normal_num;
            p->pages->num = 0;
            p->pages->block = NULL;
            qemu_mutex_unlock(&p->mutex);

-            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
+            trace_multifd_send(p->id, p->packet_num, p->normal_num, flags,
                               p->next_packet_size);

            if (use_zero_copy_send) {
@@ -719,14 +797,15 @@ static void *multifd_send_thread(void *opaque)
                if (ret != 0) {
                    break;
                }
-            } else {
+            } else if (use_packets) {
                /* Send header using the same writev call */
                p->iov[0].iov_len = p->packet_len;
                p->iov[0].iov_base = p->packet;
            }

-            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
-                                              0, p->write_flags, &local_err);
+            ret = qio_channel_write_full_all(p->c, p->iov, p->iovs_num,
+                                             write_base, NULL, 0,
+                                             p->write_flags, &local_err);
            if (ret != 0) {
                break;
            }
@@ -740,15 +819,9 @@ static void *multifd_send_thread(void *opaque)
            p->pending_job--;
            qemu_mutex_unlock(&p->mutex);

-            if (flags & MULTIFD_FLAG_SYNC) {
-                qemu_sem_post(&p->sem_sync);
-            }
-        } else if (p->quit) {
-            qemu_mutex_unlock(&p->mutex);
-            break;
        } else {
+            qemu_sem_post(&p->sem_done);
            qemu_mutex_unlock(&p->mutex);
-            /* sometimes there are spurious wakeups */
        }
    }

@@ -757,7 +830,7 @@ out:
        assert(local_err);
        trace_multifd_send_error(p->id);
        multifd_send_terminate_threads(local_err);
-        qemu_sem_post(&p->sem_sync);
+        qemu_sem_post(&p->sem_done);
        qemu_sem_post(&multifd_send_state->channels_ready);
        error_free(local_err);
    }
@@ -799,7 +872,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,
     */
    p->quit = true;
    qemu_sem_post(&multifd_send_state->channels_ready);
-    qemu_sem_post(&p->sem_sync);
+    qemu_sem_post(&p->sem_done);
 }

 static void *multifd_tls_handshake_thread(void *opaque)
@@ -870,7 +943,7 @@ static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
     migrate_set_error(migrate_get_current(), err);
     /* Error happen, we need to tell who pay attention to me */
     qemu_sem_post(&multifd_send_state->channels_ready);
-     qemu_sem_post(&p->sem_sync);
+     qemu_sem_post(&p->sem_done);
     /*
      * Although multifd_send_thread is not created, but main migration
      * thread need to judge whether it is running, so we need to mark
@@ -903,13 +976,18 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)

 static void multifd_new_send_channel_create(gpointer opaque)
 {
+    if (migrate_to_file()) {
+        file_send_channel_create(multifd_new_send_channel_async, opaque);
+    } else {
        socket_send_channel_create(multifd_new_send_channel_async, opaque);
    }
+}

 int multifd_save_setup(Error **errp)
 {
    int thread_count;
    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
+    bool use_packets = migrate_multifd_packets();
    uint8_t i;

    if (!migrate_multifd()) {
@@ -929,25 +1007,33 @@ int multifd_save_setup(Error **errp)

        qemu_mutex_init(&p->mutex);
        qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
+        qemu_sem_init(&p->sem_done, 0);
        p->quit = false;
        p->pending_job = 0;
        p->id = i;
        p->pages = multifd_pages_init(page_count);
+
+        if (use_packets) {
            p->packet_len = sizeof(MultiFDPacket_t)
                          + sizeof(uint64_t) * page_count;
            p->packet = g_malloc0(p->packet_len);
            p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
            p->packet->version = cpu_to_be32(MULTIFD_VERSION);
-        p->name = g_strdup_printf("multifdsend_%d", i);
+
            /* We need one extra place for the packet header */
            p->iov = g_new0(struct iovec, page_count + 1);
+        } else {
+            p->iov = g_new0(struct iovec, page_count);
+        }
+        p->name = g_strdup_printf("multifdsend_%d", i);
        p->normal = g_new0(ram_addr_t, page_count);
        p->page_size = qemu_target_page_size();
        p->page_count = page_count;

        if (migrate_zero_copy_send()) {
            p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
+        } else if (!use_packets) {
+            p->write_flags |= QIO_CHANNEL_WRITE_FLAG_WITH_OFFSET;
        } else {
            p->write_flags = 0;
        }
@@ -971,6 +1057,8 @@ int multifd_save_setup(Error **errp)

 struct {
    MultiFDRecvParams *params;
+    /* array of pages to receive */
+    MultiFDPages_t *pages;
    /* number of created threads */
    int count;
    /* syncs main thread and channels */
@@ -981,6 +1069,75 @@ struct {
    MultiFDMethods *ops;
 } *multifd_recv_state;

+static int multifd_recv_pages(QEMUFile *f)
+{
+    int i;
+    static int next_recv_channel;
+    MultiFDRecvParams *p = NULL;
+    MultiFDPages_t *pages = multifd_recv_state->pages;
+
+    /*
+     * next_channel can remain from a previous migration that was
+     * using more channels, so ensure it doesn't overflow if the
+     * limit is lower now.
+     */
+    next_recv_channel %= migrate_multifd_channels();
+    for (i = next_recv_channel;; i = (i + 1) % migrate_multifd_channels()) {
+        p = &multifd_recv_state->params[i];
+
+        qemu_mutex_lock(&p->mutex);
+        if (p->quit) {
+            error_report("%s: channel %d has already quit!", __func__, i);
+            qemu_mutex_unlock(&p->mutex);
+            return -1;
+        }
+        if (!p->pending_job) {
+            p->pending_job++;
+            next_recv_channel = (i + 1) % migrate_multifd_channels();
+            break;
+        }
+        qemu_mutex_unlock(&p->mutex);
+    }
+
+    multifd_recv_state->pages = p->pages;
+    p->pages = pages;
+    qemu_mutex_unlock(&p->mutex);
+    qemu_sem_post(&p->sem);
+
+    return 1;
+}
+
+int multifd_recv_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+{
+    MultiFDPages_t *pages = multifd_recv_state->pages;
+    bool changed = false;
+
+    if (!pages->block) {
+        pages->block = block;
+    }
+
+    if (pages->block == block) {
+        pages->offset[pages->num] = offset;
+        pages->num++;
+
+        if (pages->num < pages->allocated) {
+            return 1;
+        }
+    } else {
+        changed = true;
+    }
+
+    if (multifd_recv_pages(f) < 0) {
+        return -1;
+    }
+
+    if (changed) {
+        return multifd_recv_queue_page(f, block, offset);
+    }
+
+    return 1;
+}
+
 static void multifd_recv_terminate_threads(Error *err)
 {
    int i;
@@ -1002,6 +1159,7 @@ static void multifd_recv_terminate_threads(Error *err)

        qemu_mutex_lock(&p->mutex);
        p->quit = true;
+        qemu_sem_post(&p->sem);
        /*
         * We could arrive here for two reasons:
         *  - normal quit, i.e. everything went fine, just finished
@@ -1038,7 +1196,7 @@ void multifd_load_cleanup(void)
             * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
             * however try to wakeup it without harm in cleanup phase.
             */
-            qemu_sem_post(&p->sem_sync);
+            qemu_sem_post(&p->sem_done);
        }

        qemu_thread_join(&p->thread);
@@ -1050,9 +1208,12 @@ void multifd_load_cleanup(void)
        object_unref(OBJECT(p->c));
        p->c = NULL;
        qemu_mutex_destroy(&p->mutex);
-        qemu_sem_destroy(&p->sem_sync);
+        qemu_sem_destroy(&p->sem);
+        qemu_sem_destroy(&p->sem_done);
        g_free(p->name);
        p->name = NULL;
+        multifd_pages_clear(p->pages);
+        p->pages = NULL;
        p->packet_len = 0;
        g_free(p->packet);
        p->packet = NULL;
@@ -1065,6 +1226,8 @@ void multifd_load_cleanup(void)
    qemu_sem_destroy(&multifd_recv_state->sem_sync);
    g_free(multifd_recv_state->params);
    multifd_recv_state->params = NULL;
+    multifd_pages_clear(multifd_recv_state->pages);
+    multifd_recv_state->pages = NULL;
    g_free(multifd_recv_state);
    multifd_recv_state = NULL;
 }
@@ -1073,9 +1236,10 @@ void multifd_recv_sync_main(void)
 {
    int i;

-    if (!migrate_multifd()) {
+    if (!migrate_multifd() || !migrate_multifd_packets()) {
        return;
    }
+
    for (i = 0; i < migrate_multifd_channels(); i++) {
        MultiFDRecvParams *p = &multifd_recv_state->params[i];

@@ -1091,7 +1255,7 @@ void multifd_recv_sync_main(void)
            }
        }
        trace_multifd_recv_sync_main_signal(p->id);
-        qemu_sem_post(&p->sem_sync);
+        qemu_sem_post(&p->sem_done);
    }
    trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
 }
@@ -1100,6 +1264,7 @@ static void *multifd_recv_thread(void *opaque)
 {
    MultiFDRecvParams *p = opaque;
    Error *local_err = NULL;
+    bool use_packets = migrate_multifd_packets();
    int ret;

    trace_multifd_recv_thread_start(p->id);
@@ -1107,11 +1272,13 @@ static void *multifd_recv_thread(void *opaque)

    while (true) {
        uint32_t flags;
+        p->normal_num = 0;

        if (p->quit) {
            break;
        }

+        if (use_packets) {
            ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
                                           p->packet_len, &local_err);
            if (ret == 0 || ret == -1) {   /* 0: EOF  -1: Error */
@@ -1124,13 +1291,34 @@ static void *multifd_recv_thread(void *opaque)
                qemu_mutex_unlock(&p->mutex);
                break;
            }
+            p->num_packets++;
+        } else {
+            /*
+             * No packets, so we need to wait for the vmstate code to
+             * queue pages.
+             */
+            qemu_sem_wait(&p->sem);
+            qemu_mutex_lock(&p->mutex);
+            if (!p->pending_job) {
+                qemu_mutex_unlock(&p->mutex);
+                break;
+            }
+
+            for (int i = 0; i < p->pages->num; i++) {
+                p->normal[p->normal_num] = p->pages->offset[i];
+                p->normal_num++;
+            }
+
+            p->pages->num = 0;
+            p->host = p->pages->block->host;
+        }

        flags = p->flags;
        /* recv methods don't know how to handle the SYNC flag */
        p->flags &= ~MULTIFD_FLAG_SYNC;
        trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
                           p->next_packet_size);
-        p->num_packets++;
+
        p->total_normal_pages += p->normal_num;
        qemu_mutex_unlock(&p->mutex);

@@ -1143,7 +1331,14 @@ static void *multifd_recv_thread(void *opaque)

        if (flags & MULTIFD_FLAG_SYNC) {
            qemu_sem_post(&multifd_recv_state->sem_sync);
-            qemu_sem_wait(&p->sem_sync);
+            qemu_sem_wait(&p->sem_done);
+        }
+
+        if (!use_packets) {
+            qemu_mutex_lock(&p->mutex);
+            p->pending_job--;
+            p->pages->block = NULL;
+            qemu_mutex_unlock(&p->mutex);
        }
    }

@@ -1165,6 +1360,7 @@ int multifd_load_setup(Error **errp)
 {
    int thread_count;
    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
+    bool use_packets = migrate_multifd_packets();
    uint8_t i;

    /*
@@ -1178,6 +1374,7 @@ int multifd_load_setup(Error **errp)
    thread_count = migrate_multifd_channels();
    multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
    multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
+    multifd_recv_state->pages = multifd_pages_init(page_count);
    qatomic_set(&multifd_recv_state->count, 0);
    qemu_sem_init(&multifd_recv_state->sem_sync, 0);
    multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
@@ -1186,12 +1383,20 @@ int multifd_load_setup(Error **errp)
        MultiFDRecvParams *p = &multifd_recv_state->params[i];

        qemu_mutex_init(&p->mutex);
-        qemu_sem_init(&p->sem_sync, 0);
+        qemu_sem_init(&p->sem, 0);
+        qemu_sem_init(&p->sem_done, 0);
        p->quit = false;
+        p->pending_job = 0;
        p->id = i;
+        p->pages = multifd_pages_init(page_count);
+
+        if (use_packets) {
            p->packet_len = sizeof(MultiFDPacket_t)
                + sizeof(uint64_t) * page_count;
            p->packet = g_malloc0(p->packet_len);
+        } else {
+            p->read_flags |= QIO_CHANNEL_READ_FLAG_WITH_OFFSET;
+        }
        p->name = g_strdup_printf("multifdrecv_%d", i);
        p->iov = g_new0(struct iovec, page_count);
        p->normal = g_new0(ram_addr_t, page_count);
@@ -1213,6 +1418,11 @@ int multifd_load_setup(Error **errp)
    return 0;
 }

+bool multifd_recv_first_channel(void)
+{
+    return !multifd_recv_state;
+}
+
 bool multifd_recv_all_channels_created(void)
 {
    int thread_count = migrate_multifd_channels();
@@ -1237,8 +1447,10 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
 {
    MultiFDRecvParams *p;
    Error *local_err = NULL;
-    int id;
+    bool use_packets = migrate_multifd_packets();
+    int id, num_packets = 0;

+    if (use_packets) {
        id = multifd_recv_initial_packet(ioc, &local_err);
        if (id < 0) {
            multifd_recv_terminate_threads(local_err);
@@ -1250,6 +1462,12 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
        }
        trace_multifd_recv_new_channel(id);

+        /* initial packet */
+        num_packets = 1;
+    } else {
+        id = qatomic_read(&multifd_recv_state->count);
+    }
+
    p = &multifd_recv_state->params[id];
    if (p->c != NULL) {
        error_setg(&local_err, "multifd: received id '%d' already setup'",
@@ -1259,9 +1477,8 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
        return;
    }
    p->c = ioc;
+    p->num_packets = num_packets;
    object_ref(OBJECT(ioc));
-    /* initial packet */
-    p->num_packets = 1;

    p->running = true;
    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,11 +18,13 @@ void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
 void multifd_load_cleanup(void);
 void multifd_load_shutdown(void);
+bool multifd_recv_first_channel(void);
 bool multifd_recv_all_channels_created(void);
 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(QEMUFile *f);
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
+int multifd_recv_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);

 /* Multifd Compression flags */
 #define MULTIFD_FLAG_SYNC (1 << 0)
@@ -90,8 +92,8 @@ typedef struct {

    /* sem where to wait for more work */
    QemuSemaphore sem;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
+    /* channel is done transmitting until more pages are queued */
+    QemuSemaphore sem_done;

    /* this mutex protects the following parameters */
    QemuMutex mutex;
@@ -152,9 +154,13 @@ typedef struct {
    uint32_t page_size;
    /* number of pages in a full packet */
    uint32_t page_count;
+    /* multifd flags for receiving ram */
+    int read_flags;

-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
+    /* sem where to wait for more work */
+    QemuSemaphore sem;
+    /* channel is done transmitting until more pages are queued */
+    QemuSemaphore sem_done;

    /* this mutex protects the following parameters */
    QemuMutex mutex;
@@ -166,6 +172,13 @@ typedef struct {
    uint32_t flags;
    /* global number of generated multifd packets */
    uint64_t packet_num;
+    int pending_job;
+    /* array of pages to sent.
+     * The owner of 'pages' depends of 'pending_job' value:
+     * pending_job == 0 -> migration_thread can use it.
+     * pending_job != 0 -> multifd_channel can use it.
+     */
+    MultiFDPages_t *pages;

    /* thread local variables. No locking required */

@@ -209,4 +222,3 @@ typedef struct {
 void multifd_register_ops(int method, MultiFDMethods *ops);

 #endif
-
--- a/migration/options.c
+++ b/migration/options.c
@@ -200,6 +200,9 @@ Property migration_properties[] = {
    DEFINE_PROP_MIG_CAP("x-switchover-ack",
                        MIGRATION_CAPABILITY_SWITCHOVER_ACK),
    DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
+    DEFINE_PROP_BOOL("x-auto-pause", MigrationState,
+                     capabilities[MIGRATION_CAPABILITY_AUTO_PAUSE], true),
+    DEFINE_PROP_MIG_CAP("x-fixed-ram", MIGRATION_CAPABILITY_FIXED_RAM),
    DEFINE_PROP_END_OF_LIST(),
 };

@@ -210,6 +213,13 @@ bool migrate_auto_converge(void)
    return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
 }

+bool migrate_auto_pause(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return s->capabilities[MIGRATION_CAPABILITY_AUTO_PAUSE];
+}
+
 bool migrate_background_snapshot(void)
 {
    MigrationState *s = migrate_get_current();
@@ -259,6 +269,13 @@ bool migrate_events(void)
    return s->capabilities[MIGRATION_CAPABILITY_EVENTS];
 }

+bool migrate_fixed_ram(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return s->capabilities[MIGRATION_CAPABILITY_FIXED_RAM];
+}
+
 bool migrate_ignore_shared(void)
 {
    MigrationState *s = migrate_get_current();
@@ -373,6 +390,11 @@ bool migrate_multifd_flush_after_each_section(void)
    return s->multifd_flush_after_each_section;
 }

+bool migrate_multifd_packets(void)
+{
+    return !migrate_fixed_ram();
+}
+
 bool migrate_postcopy(void)
 {
    return migrate_postcopy_ram() || migrate_dirty_bitmaps();
@@ -392,6 +414,13 @@ bool migrate_tls(void)
    return s->parameters.tls_creds && *s->parameters.tls_creds;
 }

+bool migrate_to_file(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return qemu_file_is_seekable(s->to_dst_file);
+}
+
 typedef enum WriteTrackingSupport {
    WT_SUPPORT_UNKNOWN = 0,
    WT_SUPPORT_ABSENT,
@@ -618,6 +647,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
        }
    }

+    if (new_caps[MIGRATION_CAPABILITY_FIXED_RAM]) {
+        if (new_caps[MIGRATION_CAPABILITY_XBZRLE]) {
+            error_setg(errp,
+                       "Fixed-ram migration is incompatible with xbzrle");
+            return false;
+        }
+
+        if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) {
+            error_setg(errp,
+                       "Fixed-ram migration is incompatible with compression");
+            return false;
+        }
+
+        if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+            error_setg(errp,
+                       "Fixed-ram migration is incompatible with postcopy ram");
+            return false;
+        }
+    }
+
    return true;
 }

@@ -768,6 +817,22 @@ int migrate_decompress_threads(void)
    return s->parameters.decompress_threads;
 }

+bool migrate_direct_io(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    /* For now O_DIRECT is only supported with fixed-ram */
+    if (!s->capabilities[MIGRATION_CAPABILITY_FIXED_RAM]) {
+        return false;
+    }
+
+    if (s->parameters.has_direct_io) {
+        return s->parameters.direct_io;
+    }
+
+    return false;
+}
+
 uint64_t migrate_downtime_limit(void)
 {
    MigrationState *s = migrate_get_current();
@@ -976,6 +1041,11 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
    params->has_vcpu_dirty_limit = true;
    params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit;

+    if (s->parameters.has_direct_io) {
+        params->has_direct_io = true;
+        params->direct_io = s->parameters.direct_io;
+    }
+
    return params;
 }

@@ -1010,6 +1080,7 @@ void migrate_params_init(MigrationParameters *params)
    params->has_announce_step = true;
    params->has_x_vcpu_dirty_limit_period = true;
    params->has_vcpu_dirty_limit = true;
+    params->has_direct_io = qemu_has_direct_io();
 }

 /*
@@ -1307,6 +1378,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
    if (params->has_vcpu_dirty_limit) {
        dest->vcpu_dirty_limit = params->vcpu_dirty_limit;
    }
+
+    if (params->has_direct_io) {
+        dest->direct_io = params->direct_io;
+    }
 }

 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1437,6 +1512,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
    if (params->has_vcpu_dirty_limit) {
        s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit;
    }
+
+    if (params->has_direct_io) {
+        s->parameters.direct_io = params->direct_io;
+    }
 }

 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
--- a/migration/options.h
+++ b/migration/options.h
@@ -24,6 +24,7 @@ extern Property migration_properties[];
 /* capabilities */

 bool migrate_auto_converge(void);
+bool migrate_auto_pause(void);
 bool migrate_background_snapshot(void);
 bool migrate_block(void);
 bool migrate_colo(void);
@@ -31,6 +32,7 @@ bool migrate_compress(void);
 bool migrate_dirty_bitmaps(void);
 bool migrate_dirty_limit(void);
 bool migrate_events(void);
+bool migrate_fixed_ram(void);
 bool migrate_ignore_shared(void);
 bool migrate_late_block_activate(void);
 bool migrate_multifd(void);
@@ -55,9 +57,11 @@ bool migrate_zero_copy_send(void);
 */

 bool migrate_multifd_flush_after_each_section(void);
+bool migrate_multifd_packets(void);
 bool migrate_postcopy(void);
 bool migrate_rdma(void);
 bool migrate_tls(void);
+bool migrate_to_file(void);

 /* capabilities helpers */

@@ -78,6 +82,7 @@ uint8_t migrate_cpu_throttle_increment(void);
 uint8_t migrate_cpu_throttle_initial(void);
 bool migrate_cpu_throttle_tailslow(void);
 int migrate_decompress_threads(void);
+bool migrate_direct_io(void);
 uint64_t migrate_downtime_limit(void);
 uint8_t migrate_max_cpu_throttle(void);
 uint64_t migrate_max_bandwidth(void);
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -33,6 +33,7 @@
 #include "options.h"
 #include "qapi/error.h"
 #include "rdma.h"
+#include "io/channel-file.h"

 #define IO_BUF_SIZE 32768
 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
@@ -258,6 +259,10 @@ static void qemu_iovec_release_ram(QEMUFile *f)
    memset(f->may_free, 0, sizeof(f->may_free));
 }

+bool qemu_file_is_seekable(QEMUFile *f)
+{
+    return qio_channel_has_feature(f->ioc, QIO_CHANNEL_FEATURE_SEEKABLE);
+}

 /**
 * Flushes QEMUFile buffer
@@ -460,6 +465,81 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
    }
 }

+void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, off_t pos)
+{
+    Error *err = NULL;
+
+    if (f->last_error) {
+        return;
+    }
+
+    qemu_fflush(f);
+    qio_channel_pwritev(f->ioc, (char *)buf, buflen, pos, &err);
+
+    if (err) {
+        qemu_file_set_error_obj(f, -EIO, err);
+    } else {
+        f->total_transferred += buflen;
+    }
+
+    return;
+}
+
+
+size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, off_t pos)
+{
+    Error *err = NULL;
+    ssize_t ret;
+
+    if (f->last_error) {
+        return 0;
+    }
+
+    ret = qio_channel_preadv(f->ioc, (char *)buf, buflen, pos, &err);
+    if (ret == -1 || err) {
+        goto error;
+    }
+
+    return (size_t)ret;
+
+ error:
+    qemu_file_set_error_obj(f, -EIO, err);
+    return 0;
+}
+
+void qemu_set_offset(QEMUFile *f, off_t off, int whence)
+{
+    Error *err = NULL;
+    off_t ret;
+
+    qemu_fflush(f);
+
+    if (!qemu_file_is_writable(f)) {
+        f->buf_index = 0;
+        f->buf_size = 0;
+    }
+
+    ret = qio_channel_io_seek(f->ioc, off, whence, &err);
+    if (ret == (off_t)-1) {
+        qemu_file_set_error_obj(f, -EIO, err);
+    }
+}
+
+off_t qemu_get_offset(QEMUFile *f)
+{
+    Error *err = NULL;
+    off_t ret;
+
+    qemu_fflush(f);
+
+    ret = qio_channel_io_seek(f->ioc, 0, SEEK_CUR, &err);
+    if (ret == (off_t)-1) {
+        qemu_file_set_error_obj(f, -EIO, err);
+    }
+    return ret;
+}
+
+
 void qemu_put_byte(QEMUFile *f, int v)
 {
    if (f->last_error) {
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -93,6 +93,10 @@ QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 void qemu_file_set_blocking(QEMUFile *f, bool block);
 int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
+void qemu_set_offset(QEMUFile *f, off_t off, int whence);
+off_t qemu_get_offset(QEMUFile *f);
+void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, off_t pos);
+size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, off_t pos);

 QIOChannel *qemu_file_get_ioc(QEMUFile *file);

--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1157,12 +1157,18 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
        return 0;
    }

+    stat64_add(&mig_stats.zero_pages, 1);
+
+    if (migrate_fixed_ram()) {
+        /* zero pages are not transferred with fixed-ram */
+        clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->shadow_bmap);
+        return 1;
+    }
+
    len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO);
    qemu_put_byte(file, 0);
    len += 1;
    ram_release_page(pss->block->idstr, offset);
-
-    stat64_add(&mig_stats.zero_pages, 1);
    ram_transferred_add(len);

    /*
@@ -1220,6 +1226,11 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block,
 {
    QEMUFile *file = pss->pss_channel;

+    if (migrate_fixed_ram()) {
+        qemu_put_buffer_at(file, buf, TARGET_PAGE_SIZE,
+                           block->pages_offset + offset);
+        set_bit(offset >> TARGET_PAGE_BITS, block->shadow_bmap);
+    } else {
        ram_transferred_add(save_page_header(pss, pss->pss_channel, block,
                                             offset | RAM_SAVE_FLAG_PAGE));
        if (async) {
@@ -1229,6 +1240,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block,
        } else {
            qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);
        }
+    }
    ram_transferred_add(TARGET_PAGE_SIZE);
    stat64_add(&mig_stats.normal_pages, 1);
    return 1;
@@ -1374,15 +1386,13 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
        pss->page = 0;
        pss->block = QLIST_NEXT_RCU(pss->block, next);
        if (!pss->block) {
-            if (migrate_multifd() &&
+            if (!migrate_fixed_ram() && migrate_multifd() &&
                !migrate_multifd_flush_after_each_section()) {
                QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
                int ret = multifd_send_sync_main(f);
                if (ret < 0) {
                    return ret;
                }
-                qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
-                qemu_fflush(f);
            }
            /*
             * If memory migration starts over, we will meet a dirtied page
@@ -2475,6 +2485,8 @@ static void ram_save_cleanup(void *opaque)
        block->clear_bmap = NULL;
        g_free(block->bmap);
        block->bmap = NULL;
+        g_free(block->shadow_bmap);
+        block->shadow_bmap = NULL;
    }

    xbzrle_cleanup();
@@ -2842,6 +2854,7 @@ static void ram_list_init_bitmaps(void)
             */
            block->bmap = bitmap_new(pages);
            bitmap_set(block->bmap, 0, pages);
+            block->shadow_bmap = bitmap_new(block->used_length >> TARGET_PAGE_BITS);
            block->clear_bmap_shift = shift;
            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
        }
@@ -2979,6 +2992,70 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
    }
 }

+#define FIXED_RAM_HDR_VERSION 1
+struct FixedRamHeader {
+    uint32_t version;
+    uint64_t page_size;
+    uint64_t bitmap_offset;
+    uint64_t pages_offset;
+    /* end of v1 */
+} QEMU_PACKED;
+
+static void fixed_ram_insert_header(QEMUFile *file, RAMBlock *block)
+{
+    g_autofree struct FixedRamHeader *header;
+    size_t header_size, bitmap_size;
+    long num_pages;
+
+    header = g_new0(struct FixedRamHeader, 1);
+    header_size = sizeof(struct FixedRamHeader);
+
+    num_pages = block->used_length >> TARGET_PAGE_BITS;
+    bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+    /*
+     * Save the file offsets of where the bitmap and the pages should
+     * go as they are written at the end of migration and during the
+     * iterative phase, respectively.
+     */
+    block->bitmap_offset = qemu_get_offset(file) + header_size;
+    block->pages_offset = ROUND_UP(block->bitmap_offset +
+                                   bitmap_size, 0x100000);
+
+    header->version = cpu_to_be32(FIXED_RAM_HDR_VERSION);
+    header->page_size = cpu_to_be64(TARGET_PAGE_SIZE);
+    header->bitmap_offset = cpu_to_be64(block->bitmap_offset);
+    header->pages_offset = cpu_to_be64(block->pages_offset);
+
+    qemu_put_buffer(file, (uint8_t *) header, header_size);
+}
+
+static int fixed_ram_read_header(QEMUFile *file, struct FixedRamHeader *header)
+{
+    size_t ret, header_size = sizeof(struct FixedRamHeader);
+
+    ret = qemu_get_buffer(file, (uint8_t *)header, header_size);
+    if (ret != header_size) {
+        return -1;
+    }
+
+    /* migration stream is big-endian */
+    be32_to_cpus(&header->version);
+
+    if (header->version > FIXED_RAM_HDR_VERSION) {
+        error_report("Migration fixed-ram capability version mismatch (expected %d, got %d)",
+                     FIXED_RAM_HDR_VERSION, header->version);
+        return -1;
+    }
+
+    be64_to_cpus(&header->page_size);
+    be64_to_cpus(&header->bitmap_offset);
+    be64_to_cpus(&header->pages_offset);
+
+
+    return 0;
+}
+
 /*
 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
 * long-running RCU critical section.  When rcu-reclaims in the code
@@ -3028,6 +3105,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
            if (migrate_ignore_shared()) {
                qemu_put_be64(f, block->mr->addr);
            }
+
+            if (migrate_fixed_ram()) {
+                fixed_ram_insert_header(f, block);
+                /* prepare offset for next ramblock */
+                qemu_set_offset(f, block->pages_offset + block->used_length, SEEK_SET);
+            }
        }
    }

@@ -3061,6 +3144,25 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    return 0;
 }

+static void ram_save_shadow_bmap(QEMUFile *f)
+{
+    RAMBlock *block;
+
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        long num_pages = block->used_length >> TARGET_PAGE_BITS;
+        long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+        qemu_put_buffer_at(f, (uint8_t *)block->shadow_bmap, bitmap_size,
+                           block->bitmap_offset);
+        /* to catch any thread late sending pages */
+        block->shadow_bmap = NULL;
+    }
+}
+
+void ramblock_set_shadow_bmap_atomic(RAMBlock *block, ram_addr_t offset)
+{
+    set_bit_atomic(offset >> TARGET_PAGE_BITS, block->shadow_bmap);
+}
+
 /**
 * ram_save_iterate: iterative stage for migration
 *
@@ -3179,7 +3281,6 @@ out:
        qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
        qemu_fflush(f);
        ram_transferred_add(8);
-
        ret = qemu_file_get_error(f);
    }
    if (ret < 0) {
@@ -3256,7 +3357,13 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
    if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
        qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
    }
+
+    if (migrate_fixed_ram()) {
+        ram_save_shadow_bmap(f);
+    }
+
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
    qemu_fflush(f);

    return 0;
@@ -3854,6 +3961,73 @@ void colo_flush_ram_cache(void)
    trace_colo_flush_ram_cache_end();
 }

+static void read_ramblock_fixed_ram(QEMUFile *f, RAMBlock *block,
+                                    long num_pages, unsigned long *bitmap)
+{
+    unsigned long set_bit_idx, clear_bit_idx;
+    unsigned long len;
+    ram_addr_t offset;
+    void *host;
+    size_t read, completed, read_len;
+
+    for (set_bit_idx = find_first_bit(bitmap, num_pages);
+         set_bit_idx < num_pages;
+         set_bit_idx = find_next_bit(bitmap, num_pages, clear_bit_idx + 1)) {
+
+        clear_bit_idx = find_next_zero_bit(bitmap, num_pages, set_bit_idx + 1);
+
+        len = TARGET_PAGE_SIZE * (clear_bit_idx - set_bit_idx);
+        offset = set_bit_idx << TARGET_PAGE_BITS;
+
+        for (read = 0, completed = 0; completed < len; offset += read) {
+            host = host_from_ram_block_offset(block, offset);
+            read_len = MIN(len, TARGET_PAGE_SIZE);
+
+            if (migrate_multifd()) {
+                multifd_recv_queue_page(f, block, offset);
+                read = read_len;
+            } else {
+                read = qemu_get_buffer_at(f, host, read_len,
+                                          block->pages_offset + offset);
+            }
+            completed += read;
+        }
+    }
+}
+
+static int parse_ramblock_fixed_ram(QEMUFile *f, RAMBlock *block, ram_addr_t length)
+{
+    g_autofree unsigned long *bitmap = NULL;
+    struct FixedRamHeader header;
+    size_t bitmap_size;
+    long num_pages;
+    int ret = 0;
+
+    ret = fixed_ram_read_header(f, &header);
+    if (ret < 0) {
+        error_report("Error reading fixed-ram header");
+        return -EINVAL;
+    }
+
+    block->pages_offset = header.pages_offset;
+    num_pages = length / header.page_size;
+    bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+    bitmap = g_malloc0(bitmap_size);
+    if (qemu_get_buffer_at(f, (uint8_t *)bitmap, bitmap_size,
+                           header.bitmap_offset) != bitmap_size) {
+        error_report("Error parsing dirty bitmap");
+        return -EINVAL;
+    }
+
+    read_ramblock_fixed_ram(f, block, num_pages, bitmap);
+
+    /* Skip pages array */
+    qemu_set_offset(f, block->pages_offset + length, SEEK_SET);
+
+    return ret;
+}
+
 static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
 {
    int ret = 0;
@@ -3862,6 +4036,10 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)

    assert(block);

+    if (migrate_fixed_ram()) {
+        return parse_ramblock_fixed_ram(f, block, length);
+    }
+
    if (!qemu_ram_is_migratable(block)) {
        error_report("block %s should not be migrated !", block->idstr);
        return -EINVAL;
@@ -4063,6 +4241,7 @@ static int ram_load_precopy(QEMUFile *f)
                migrate_multifd_flush_after_each_section()) {
                multifd_recv_sync_main();
            }
+
            break;
        case RAM_SAVE_FLAG_HOOK:
            ret = rdma_registration_handle(f);
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -75,6 +75,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
 bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
 void postcopy_preempt_shutdown_file(MigrationState *s);
 void *postcopy_preempt_thread(void *opaque);
+void ramblock_set_shadow_bmap_atomic(RAMBlock *block, ram_addr_t offset);

 /* ram cache */
 int colo_init_ram_cache(void);
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -243,6 +243,7 @@ static bool should_validate_capability(int capability)
    /* Validate only new capabilities to keep compatibility. */
    switch (capability) {
    case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
+    case MIGRATION_CAPABILITY_FIXED_RAM:
        return true;
    default:
        return false;
@@ -1676,6 +1677,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
    qemu_savevm_state_setup(f);

    while (qemu_file_get_error(f) == 0) {
+        migration_update_counters(ms, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
        if (qemu_savevm_state_iterate(f, false) > 0) {
            break;
        }
@@ -1698,6 +1700,9 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
    }
    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);

+    migration_calculate_complete(ms);
+    trace_migration_status((int)ms->mbps / 8, (int)ms->pages_per_second, ms->total_time);
+
    /* f is outer parameter, it should not stay in global migration state after
     * this function finished */
    ms->to_dst_file = NULL;
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -138,7 +138,7 @@ multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, u
 multifd_send_error(uint8_t id) "channel %u"
 multifd_send_sync_main(long packet_num) "packet num %ld"
 multifd_send_sync_main_signal(uint8_t id) "channel %u"
-multifd_send_sync_main_wait(uint8_t id) "channel %u"
+multifd_send_wait(uint8_t n) "waiting for %u channels to finish sending"
 multifd_send_terminate_threads(bool error) "error %d"
 multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %"  PRIu64
 multifd_send_thread_start(uint8_t id) "%u"
@@ -170,6 +170,7 @@ migration_return_path_end_after(int rp_error) "%d"
 migration_thread_after_loop(void) ""
 migration_thread_file_err(void) ""
 migration_thread_setup_complete(void) ""
+migration_status(int mpbs, int pages_per_second, int64_t total_time) "%d MB/s, %d pages/s, %ld ms"
 open_return_path_on_source(void) ""
 open_return_path_on_source_continue(void) ""
 postcopy_start(void) ""
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -523,6 +523,13 @@
 #     and can result in more stable read performance.  Requires KVM
 #     with accelerator property "dirty-ring-size" set.  (Since 8.1)
 #
+# @auto-pause: If enabled, allows QEMU to decide whether to pause the
+#     VM before migration for an optimal migration performance.
+#     Enabled by default. (since 8.1)
+#
+# @fixed-ram: Migrate using fixed offsets for each RAM page. Requires
+#             a seekable transport such as a file.  (since 8.1)
+#
 # Features:
 #
 # @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -539,7 +546,7 @@
           { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
           'validate-uuid', 'background-snapshot',
           'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
-           'dirty-limit'] }
+           'dirty-limit', 'auto-pause', 'fixed-ram'] }

 ##
 # @MigrationCapabilityStatus:
@@ -833,6 +840,9 @@
 # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
 #     Defaults to 1.  (Since 8.1)
 #
+# @direct-io: Open migration files with O_DIRECT when possible. Not
+#             all migration transports support this. (since 8.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
@@ -857,7 +867,7 @@
           'multifd-zlib-level', 'multifd-zstd-level',
           'block-bitmap-mapping',
           { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
-           'vcpu-dirty-limit'] }
+           'vcpu-dirty-limit', 'direct-io'] }

 ##
 # @MigrateSetParameters:
@@ -1009,6 +1019,9 @@
 # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
 #     Defaults to 1.  (Since 8.1)
 #
+# @direct-io: Open migration files with O_DIRECT when possible. Not
+#             all migration transports support this. (since 8.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
@@ -1051,7 +1064,8 @@
            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
            '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
                                            'features': [ 'unstable' ] },
-            '*vcpu-dirty-limit': 'uint64'} }
+            '*vcpu-dirty-limit': 'uint64',
+            '*direct-io': 'bool' } }

 ##
 # @migrate-set-parameters:
@@ -1223,6 +1237,9 @@
 # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
 #     Defaults to 1.  (Since 8.1)
 #
+# @direct-io: Open migration files with O_DIRECT when possible. Not
+#             all migration transports support this. (since 8.1)
+#
 # Features:
 #
 # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
@@ -1262,7 +1279,8 @@
            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
            '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
                                            'features': [ 'unstable' ] },
-            '*vcpu-dirty-limit': 'uint64'} }
+            '*vcpu-dirty-limit': 'uint64',
+            '*direct-io': 'bool' } }

 ##
 # @query-migrate-parameters:
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -87,6 +87,7 @@ struct QTestState
    GList *pending_events;
    QTestQMPEventCallback eventCB;
    void *eventData;
+    QTestMigrationState *migration_state;
 };

 static GHookList abrt_hooks;
@@ -488,6 +489,8 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
        s->irq_level[i] = false;
    }

+    s->migration_state = g_new0(QTestMigrationState, 1);
+
    /*
     * Stopping QEMU for debugging is not supported on Windows.
     *
@@ -579,6 +582,7 @@ void qtest_quit(QTestState *s)
    close(s->fd);
    close(s->qmp_fd);
    g_string_free(s->rx, true);
+    g_free(s->migration_state);

    for (GList *it = s->pending_events; it != NULL; it = it->next) {
        qobject_unref((QDict *)it->data);
@@ -832,6 +836,11 @@ void qtest_qmp_set_event_callback(QTestState *s,
    s->eventData = opaque;
 }

+void qtest_qmp_set_migration_callback(QTestState *s, QTestQMPEventCallback cb)
+{
+    qtest_qmp_set_event_callback(s, cb, s->migration_state);
+}
+
 QDict *qtest_qmp_event_ref(QTestState *s, const char *event)
 {
    while (s->pending_events) {
@@ -1838,3 +1847,8 @@ bool mkimg(const char *file, const char *fmt, unsigned size_mb)

    return ret && !err;
 }
+
+QTestMigrationState *qtest_migration_state(QTestState *s)
+{
+    return s->migration_state;
+}
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -23,6 +23,22 @@

 typedef struct QTestState QTestState;

+struct QTestMigrationState {
+    bool stop_seen;
+    bool resume_seen;
+    bool setup_seen;
+    bool active_seen;
+};
+typedef struct QTestMigrationState QTestMigrationState;
+
+/**
+ * qtest_migration_state:
+ * @s: #QTestState instance to operate on.
+ *
+ * Returns: #QTestMigrationState instance.
+ */
+QTestMigrationState *qtest_migration_state(QTestState *s);
+
 /**
 * qtest_initf:
 * @fmt: Format for creating other arguments to pass to QEMU, formatted
@@ -275,6 +291,15 @@ typedef bool (*QTestQMPEventCallback)(QTestState *s, const char *name,
 void qtest_qmp_set_event_callback(QTestState *s,
                                  QTestQMPEventCallback cb, void *opaque);

+/**
+ * qtest_qmp_set_migration_callback:
+ * @s: #QTestSTate instance to operate on
+ * @cb: callback to invoke for events
+ *
+ * Like qtest_qmp_set_event_callback, but includes migration state events
+ */
+void qtest_qmp_set_migration_callback(QTestState *s, QTestQMPEventCallback cb);
+
 /**
 * qtest_qmp_eventwait:
 * @s: #QTestState instance to operate on.
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -23,26 +23,32 @@
 */
 #define MIGRATION_STATUS_WAIT_TIMEOUT 120

-bool migrate_watch_for_stop(QTestState *who, const char *name,
+bool migrate_watch_for_events(QTestState *who, const char *name,
                              QDict *event, void *opaque)
 {
-    bool *seen = opaque;
+    QTestMigrationState *state = opaque;

    if (g_str_equal(name, "STOP")) {
-        *seen = true;
+        state->stop_seen = true;
        return true;
-    }
+    } else if (g_str_equal(name, "RESUME")) {
+        state->resume_seen = true;
+        return true;
+    } else if (g_str_equal(name, "MIGRATION")) {
+        QDict *data;
+        g_assert(qdict_haskey(event, "data"));

+        data = qdict_get_qdict(event, "data");
+        g_assert(qdict_haskey(data, "status"));
+
+        if (g_str_equal(qdict_get_str(data, "status"), "setup")) {
+            state->setup_seen = true;
+        } else if (g_str_equal(qdict_get_str(data, "status"), "active")) {
+            state->active_seen = true;
+        } else {
            return false;
        }

-bool migrate_watch_for_resume(QTestState *who, const char *name,
-                              QDict *event, void *opaque)
-{
-    bool *seen = opaque;
-
-    if (g_str_equal(name, "RESUME")) {
-        *seen = true;
        return true;
    }

@@ -101,10 +107,67 @@ void migrate_set_capability(QTestState *who, const char *capability,
                             capability, value);
 }

+void wait_for_stop(QTestState *who)
+{
+    QTestMigrationState *state = qtest_migration_state(who);
+
+    if (!state->stop_seen) {
+        qtest_qmp_eventwait(who, "STOP");
+    }
+}
+
+void wait_for_resume(QTestState *who)
+{
+    QTestMigrationState *state = qtest_migration_state(who);
+
+    if (!state->resume_seen) {
+        qtest_qmp_eventwait(who, "RESUME");
+    }
+}
+
+static void wait_for_migration_state(QTestState *who, const char* state)
+{
+        QDict *rsp, *data;
+
+        for (;;) {
+            rsp = qtest_qmp_eventwait_ref(who, "MIGRATION");
+            g_assert(qdict_haskey(rsp, "data"));
+
+            data = qdict_get_qdict(rsp, "data");
+            g_assert(qdict_haskey(data, "status"));
+
+            if (g_str_equal(qdict_get_str(data, "status"), state)) {
+                break;
+            }
+            qobject_unref(rsp);
+        }
+
+        qobject_unref(rsp);
+        return;
+}
+
+void wait_for_setup(QTestState *who)
+{
+    QTestMigrationState *state = qtest_migration_state(who);
+
+    if (!state->setup_seen) {
+        wait_for_migration_state(who, "setup");
+    }
+}
+
+void wait_for_active(QTestState *who)
+{
+    QTestMigrationState *state = qtest_migration_state(who);
+
+    if (!state->active_seen) {
+        wait_for_migration_state(who, "active");
+    }
+}
+
 void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
 {
    va_list ap;
-    QDict *args, *rsp, *data;
+    QDict *args, *rsp;

    va_start(ap, fmt);
    args = qdict_from_vjsonf_nofail(fmt, ap);
@@ -120,14 +183,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
    g_assert(qdict_haskey(rsp, "return"));
    qobject_unref(rsp);

-    rsp = qtest_qmp_eventwait_ref(to, "MIGRATION");
-    g_assert(qdict_haskey(rsp, "data"));
-
-    data = qdict_get_qdict(rsp, "data");
-    g_assert(qdict_haskey(data, "status"));
-    g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup");
-
-    qobject_unref(rsp);
+    wait_for_setup(to);
 }

 /*
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -15,11 +15,15 @@

 #include "libqtest.h"

-bool migrate_watch_for_stop(QTestState *who, const char *name,
-                            QDict *event, void *opaque);
-bool migrate_watch_for_resume(QTestState *who, const char *name,
+bool migrate_watch_for_events(QTestState *who, const char *name,
                              QDict *event, void *opaque);

+
+void wait_for_stop(QTestState *who);
+void wait_for_resume(QTestState *who);
+void wait_for_setup(QTestState *who);
+void wait_for_active(QTestState *who);
+
 G_GNUC_PRINTF(3, 4)
 void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);

--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -43,8 +43,6 @@
 unsigned start_address;
 unsigned end_address;
 static bool uffd_feature_thread_id;
-static bool got_src_stop;
-static bool got_dst_resume;

 /*
 * An initial 3 MB offset is used as that corresponds
@@ -275,21 +273,20 @@ static void read_blocktime(QTestState *who)
    qobject_unref(rsp_return);
 }

+/*
+ * Wait for two changes in the migration pass count, but bail if we stop.
+ */
 static void wait_for_migration_pass(QTestState *who)
 {
-    uint64_t initial_pass = get_migration_pass(who);
-    uint64_t pass;
+    uint64_t pass, prev_pass = 0, changes = 0;
+    QTestMigrationState *state = qtest_migration_state(who);

-    /* Wait for the 1st sync */
-    while (!got_src_stop && !initial_pass) {
-        usleep(1000);
-        initial_pass = get_migration_pass(who);
-    }
-
-    do {
+    while (changes < 2 && !state->stop_seen) {
        usleep(1000);
        pass = get_migration_pass(who);
-    } while (pass == initial_pass && !got_src_stop);
+        changes += (pass != prev_pass);
+        prev_pass = pass;
+    }
 }

 static void check_guests_ram(QTestState *who)
@@ -615,10 +612,7 @@ static void migrate_postcopy_start(QTestState *from, QTestState *to)
 {
    qtest_qmp_assert_success(from, "{ 'execute': 'migrate-start-postcopy' }");

-    if (!got_src_stop) {
-        qtest_qmp_eventwait(from, "STOP");
-    }
-
+    wait_for_stop(from);
    qtest_qmp_eventwait(to, "RESUME");
 }

@@ -751,8 +745,6 @@ static int test_migrate_start(QTestState **from, QTestState **to,
        }
    }

-    got_src_stop = false;
-    got_dst_resume = false;
    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
        memory_size = "150M";
        arch_opts = g_strdup_printf("-drive file=%s,format=raw", bootpath);
@@ -823,9 +815,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
                                 ignore_stderr);
    if (!args->only_target) {
        *from = qtest_init(cmd_source);
-        qtest_qmp_set_event_callback(*from,
-                                     migrate_watch_for_stop,
-                                     &got_src_stop);
+        qtest_qmp_set_migration_callback(*from, migrate_watch_for_events);
    }

    cmd_target = g_strdup_printf("-accel kvm%s -accel tcg "
@@ -842,9 +832,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
                                 args->opts_target ? args->opts_target : "",
                                 ignore_stderr);
    *to = qtest_init(cmd_target);
-    qtest_qmp_set_event_callback(*to,
-                                 migrate_watch_for_resume,
-                                 &got_dst_resume);
+    qtest_qmp_set_migration_callback(*to, migrate_watch_for_events);

    /*
     * Remove shmem file immediately to avoid memory leak in test failed case.
@@ -1594,9 +1582,7 @@ static void test_precopy_common(MigrateCommon *args)
         */
        if (args->result == MIG_TEST_SUCCEED) {
            qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
-            if (!got_src_stop) {
-                qtest_qmp_eventwait(from, "STOP");
-            }
+            wait_for_stop(from);
            migrate_ensure_converge(from);
        }
    }
@@ -1642,9 +1628,8 @@ static void test_precopy_common(MigrateCommon *args)
             */
            wait_for_migration_complete(from);

-            if (!got_src_stop) {
-                qtest_qmp_eventwait(from, "STOP");
-            }
+            wait_for_stop(from);
+
        } else {
            wait_for_migration_complete(from);
            /*
@@ -1657,10 +1642,7 @@ static void test_precopy_common(MigrateCommon *args)
            qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
        }

-        if (!got_dst_resume) {
-            qtest_qmp_eventwait(to, "RESUME");
-        }
-
+        wait_for_resume(to);
        wait_for_serial("dest_serial");
    }

@@ -1672,7 +1654,7 @@ finish:
    test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
 }

-static void test_file_common(MigrateCommon *args, bool stop_src)
+static void test_file_common(MigrateCommon *args, bool stop_src, bool auto_pause)
 {
    QTestState *from, *to;
    void *data_hook = NULL;
@@ -1682,6 +1664,13 @@ static void test_file_common(MigrateCommon *args, bool stop_src)
        return;
    }

+    migrate_set_capability(from, "events", true);
+    migrate_set_capability(to, "events", true);
+
+    if (!auto_pause) {
+        migrate_set_capability(from, "auto-pause", false);
+    }
+
    /*
     * File migration is never live. We can keep the source VM running
     * during migration, but the destination will not be running
@@ -1698,17 +1687,31 @@ static void test_file_common(MigrateCommon *args, bool stop_src)

    if (stop_src) {
        qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
-        if (!got_src_stop) {
-            qtest_qmp_eventwait(from, "STOP");
-        }
+        wait_for_stop(from);
    }

    if (args->result == MIG_TEST_QMP_ERROR) {
        migrate_qmp_fail(from, connect_uri, "{}");
        goto finish;
    }
-
    migrate_qmp(from, connect_uri, "{}");
+
+    wait_for_setup(from);
+
+    /* auto-pause stops the VM right after setup */
+    if (auto_pause && !stop_src) {
+        wait_for_stop(from);
+    }
+
+    wait_for_active(from);
+
+    /*
+     * If the VM is not already stop by the test or auto-pause,
+     * migration completion will stop it.
+     */
+    if (!stop_src && !auto_pause) {
+        wait_for_stop(from);
+    }
    wait_for_migration_complete(from);

    /*
@@ -1716,16 +1719,19 @@ static void test_file_common(MigrateCommon *args, bool stop_src)
     * destination.
     */
    migrate_incoming_qmp(to, connect_uri, "{}");
+    wait_for_active(to);
    wait_for_migration_complete(to);

-    if (stop_src) {
+    if (stop_src || auto_pause) {
+        /*
+         * The VM has been paused on source by either the test or
+         * auto-pause, re-start on destination to make sure it won't
+         * crash.
+         */
        qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
    }

-    if (!got_dst_resume) {
-        qtest_qmp_eventwait(to, "RESUME");
-    }
-
+    wait_for_resume(to);
    wait_for_serial("dest_serial");

 finish:
@@ -1843,9 +1849,7 @@ static void test_ignore_shared(void)

    migrate_wait_for_dirty_mem(from, to);

-    if (!got_src_stop) {
-        qtest_qmp_eventwait(from, "STOP");
-    }
+    wait_for_stop(from);

    qtest_qmp_eventwait(to, "RESUME");

@@ -1940,7 +1944,7 @@ static void test_precopy_file(void)
        .listen_uri = "defer",
    };

-    test_file_common(&args, true);
+    test_file_common(&args, true, true);
 }

 static void file_offset_finish_hook(QTestState *from, QTestState *to,
@@ -1984,7 +1988,7 @@ static void test_precopy_file_offset(void)
        .finish_hook = file_offset_finish_hook,
    };

-    test_file_common(&args, false);
+    test_file_common(&args, false, true);
 }

 static void test_precopy_file_offset_bad(void)
@@ -1998,7 +2002,103 @@ static void test_precopy_file_offset_bad(void)
        .result = MIG_TEST_QMP_ERROR,
    };

-    test_file_common(&args, false);
+    test_file_common(&args, false, false);
+}
+
+static void *migrate_fixed_ram_start(QTestState *from, QTestState *to)
+{
+    migrate_set_capability(from, "fixed-ram", true);
+    migrate_set_capability(to, "fixed-ram", true);
+
+    return NULL;
+}
+
+static void test_precopy_file_fixed_ram_live(void)
+{
+    g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+                                           FILE_TEST_FILENAME);
+    MigrateCommon args = {
+        .connect_uri = uri,
+        .listen_uri = "defer",
+        .start_hook = migrate_fixed_ram_start,
+    };
+
+    test_file_common(&args, false, false);
+}
+
+static void test_precopy_file_fixed_ram(void)
+{
+    g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+                                           FILE_TEST_FILENAME);
+    MigrateCommon args = {
+        .connect_uri = uri,
+        .listen_uri = "defer",
+        .start_hook = migrate_fixed_ram_start,
+    };
+
+    test_file_common(&args, false, true);
+}
+
+static void *migrate_multifd_fixed_ram_start(QTestState *from, QTestState *to)
+{
+    migrate_fixed_ram_start(from, to);
+
+    migrate_set_parameter_int(from, "multifd-channels", 4);
+    migrate_set_parameter_int(to, "multifd-channels", 4);
+
+    migrate_set_capability(from, "multifd", true);
+    migrate_set_capability(to, "multifd", true);
+
+    return NULL;
+}
+
+static void *migrate_multifd_fixed_ram_dio_start(QTestState *from, QTestState *to)
+{
+    migrate_multifd_fixed_ram_start(from, to);
+
+    migrate_set_parameter_bool(from, "direct-io", true);
+    migrate_set_parameter_bool(to, "direct-io", true);
+
+    return NULL;
+}
+
+static void test_multifd_file_fixed_ram_live(void)
+{
+    g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+                                           FILE_TEST_FILENAME);
+    MigrateCommon args = {
+        .connect_uri = uri,
+        .listen_uri = "defer",
+        .start_hook = migrate_multifd_fixed_ram_start,
+    };
+
+    test_file_common(&args, false, false);
+}
+
+static void test_multifd_file_fixed_ram(void)
+{
+    g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+                                           FILE_TEST_FILENAME);
+    MigrateCommon args = {
+        .connect_uri = uri,
+        .listen_uri = "defer",
+        .start_hook = migrate_multifd_fixed_ram_start,
+    };
+
+    test_file_common(&args, false, true);
+}
+
+static void test_multifd_file_fixed_ram_dio(void)
+{
+    g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+                                           FILE_TEST_FILENAME);
+    MigrateCommon args = {
+        .connect_uri = uri,
+        .listen_uri = "defer",
+        .start_hook = migrate_multifd_fixed_ram_dio_start,
+    };
+
+    test_file_common(&args, false, true);
 }

 static void test_precopy_tcp_plain(void)
@@ -2355,7 +2455,7 @@ static void test_migrate_auto_converge(void)
            break;
        }
        usleep(20);
-        g_assert_false(got_src_stop);
+        g_assert_false(qtest_migration_state(from)->stop_seen);
    } while (true);
    /* The first percentage of throttling should be at least init_pct */
    g_assert_cmpint(percentage, >=, init_pct);
@@ -2694,9 +2794,7 @@ static void test_multifd_tcp_cancel(void)

    migrate_ensure_converge(from);

-    if (!got_src_stop) {
-        qtest_qmp_eventwait(from, "STOP");
-    }
+    wait_for_stop(from);
    qtest_qmp_eventwait(to2, "RESUME");

    wait_for_serial("dest_serial");
@@ -3056,6 +3154,19 @@ int main(int argc, char **argv)
    qtest_add_func("/migration/precopy/file/offset/bad",
                   test_precopy_file_offset_bad);

+    qtest_add_func("/migration/precopy/file/fixed-ram",
+                   test_precopy_file_fixed_ram);
+    qtest_add_func("/migration/precopy/file/fixed-ram/live",
+                   test_precopy_file_fixed_ram_live);
+
+    qtest_add_func("/migration/multifd/file/fixed-ram",
+                   test_multifd_file_fixed_ram);
+    qtest_add_func("/migration/multifd/file/fixed-ram/live",
+                   test_multifd_file_fixed_ram_live);
+
+    qtest_add_func("/migration/multifd/file/fixed-ram/dio",
+                   test_multifd_file_fixed_ram_dio);
+
 #ifdef CONFIG_GNUTLS
    qtest_add_func("/migration/precopy/unix/tls/psk",
                   test_precopy_unix_tls_psk);
@@ -3128,14 +3239,8 @@ int main(int argc, char **argv)
    }
    qtest_add_func("/migration/multifd/tcp/plain/none",
                   test_multifd_tcp_none);
-    /*
-     * This test is flaky and sometimes fails in CI and otherwise:
-     * don't run unless user opts in via environment variable.
-     */
-    if (getenv("QEMU_TEST_FLAKY_TESTS")) {
    qtest_add_func("/migration/multifd/tcp/plain/cancel",
                   test_multifd_tcp_cancel);
-    }
    qtest_add_func("/migration/multifd/tcp/plain/zlib",
                   test_multifd_tcp_zlib);
 #ifdef CONFIG_ZSTD
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -277,6 +277,15 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
 }
 #endif

+bool qemu_has_direct_io(void)
+{
+#ifdef O_DIRECT
+    return true;
+#else
+    return false;
+#endif
+}
+
 static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
 {
    int ret;