| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Postcopy migration for RAM | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Authors: | 
					
						
							|  |  |  |  *  Dave Gilbert  <dgilbert@redhat.com> | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This work is licensed under the terms of the GNU GPL, version 2 or later. | 
					
						
							|  |  |  |  * See the COPYING file in the top-level directory. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Postcopy is a migration technique where the execution flips from the | 
					
						
							|  |  |  |  * source to the destination before all the data has been copied. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-26 18:16:54 +00:00
										 |  |  | #include "qemu/osdep.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-24 20:50:19 +02:00
										 |  |  | #include "exec/target_page.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-24 20:07:27 +02:00
										 |  |  | #include "migration.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-20 18:52:18 +02:00
										 |  |  | #include "qemu-file.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-20 14:48:46 +02:00
										 |  |  | #include "savevm.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-20 13:12:24 +02:00
										 |  |  | #include "postcopy-ram.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-17 20:26:27 +02:00
										 |  |  | #include "ram.h"
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #include "sysemu/sysemu.h"
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:23 +00:00
										 |  |  | #include "sysemu/balloon.h"
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #include "qemu/error-report.h"
 | 
					
						
							|  |  |  | #include "trace.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  | /* Arbitrary limit on size of each discard command,
 | 
					
						
							|  |  |  |  * keeps them around ~200 bytes | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #define MAX_DISCARDS_PER_COMMAND 12
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct PostcopyDiscardState { | 
					
						
							|  |  |  |     const char *ramblock_name; | 
					
						
							|  |  |  |     uint16_t cur_entry; | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Start and length of a discard range (bytes) | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     uint64_t start_list[MAX_DISCARDS_PER_COMMAND]; | 
					
						
							|  |  |  |     uint64_t length_list[MAX_DISCARDS_PER_COMMAND]; | 
					
						
							|  |  |  |     unsigned int nsentwords; | 
					
						
							|  |  |  |     unsigned int nsentcmds; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | /* Postcopy needs to detect accesses to pages that haven't yet been copied
 | 
					
						
							|  |  |  |  * across, and efficiently map new pages in, the techniques for doing this | 
					
						
							|  |  |  |  * are target OS specific. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #if defined(__linux__)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  | #include <poll.h>
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #include <sys/ioctl.h>
 | 
					
						
							|  |  |  | #include <sys/syscall.h>
 | 
					
						
							|  |  |  | #include <asm/types.h> /* for __u64 */
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-23 16:09:15 +00:00
										 |  |  | #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
 | 
					
						
							|  |  |  | #include <sys/eventfd.h>
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #include <linux/userfaultfd.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool ufd_version_check(int ufd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     struct uffdio_api api_struct; | 
					
						
							|  |  |  |     uint64_t ioctl_mask; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     api_struct.api = UFFD_API; | 
					
						
							|  |  |  |     api_struct.features = 0; | 
					
						
							|  |  |  |     if (ioctl(ufd, UFFDIO_API, &api_struct)) { | 
					
						
							|  |  |  |         error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", | 
					
						
							|  |  |  |                      strerror(errno)); | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | | 
					
						
							|  |  |  |                  (__u64)1 << _UFFDIO_UNREGISTER; | 
					
						
							|  |  |  |     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { | 
					
						
							|  |  |  |         error_report("Missing userfault features: %" PRIx64, | 
					
						
							|  |  |  |                      (uint64_t)(~api_struct.ioctls & ioctl_mask)); | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:42 +00:00
										 |  |  |     if (getpagesize() != ram_pagesize_summary()) { | 
					
						
							|  |  |  |         bool have_hp = false; | 
					
						
							|  |  |  |         /* We've got a huge page */ | 
					
						
							|  |  |  | #ifdef UFFD_FEATURE_MISSING_HUGETLBFS
 | 
					
						
							|  |  |  |         have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |         if (!have_hp) { | 
					
						
							|  |  |  |             error_report("Userfault on this host does not support huge pages"); | 
					
						
							|  |  |  |             return false; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  | /* Callback from postcopy_ram_supported_by_host block iterator.
 | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2017-05-17 17:58:10 +01:00
										 |  |  | static int test_ramblock_postcopiable(const char *block_name, void *host_addr, | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  |                              ram_addr_t offset, ram_addr_t length, void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2017-05-17 17:58:10 +01:00
										 |  |  |     RAMBlock *rb = qemu_ram_block_by_name(block_name); | 
					
						
							|  |  |  |     size_t pagesize = qemu_ram_pagesize(rb); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (qemu_ram_is_shared(rb)) { | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  |         error_report("Postcopy on shared RAM (%s) is not yet supported", | 
					
						
							|  |  |  |                      block_name); | 
					
						
							|  |  |  |         return 1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-05-17 17:58:10 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (length % pagesize) { | 
					
						
							|  |  |  |         error_report("Postcopy requires RAM blocks to be a page size multiple," | 
					
						
							|  |  |  |                      " block %s is 0x" RAM_ADDR_FMT " bytes with a " | 
					
						
							|  |  |  |                      "page size of 0x%zx", block_name, length, pagesize); | 
					
						
							|  |  |  |         return 1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:22 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Note: This has the side effect of munlock'ing all of RAM, that's | 
					
						
							|  |  |  |  * normally fine since if the postcopy succeeds it gets turned back on at the | 
					
						
							|  |  |  |  * end. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | bool postcopy_ram_supported_by_host(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     long pagesize = getpagesize(); | 
					
						
							|  |  |  |     int ufd = -1; | 
					
						
							|  |  |  |     bool ret = false; /* Error unless we change it */ | 
					
						
							|  |  |  |     void *testarea = NULL; | 
					
						
							|  |  |  |     struct uffdio_register reg_struct; | 
					
						
							|  |  |  |     struct uffdio_range range_struct; | 
					
						
							|  |  |  |     uint64_t feature_mask; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-03-21 09:09:14 +01:00
										 |  |  |     if (qemu_target_page_size() > pagesize) { | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  |         error_report("Target page size bigger than host page size"); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ufd = syscall(__NR_userfaultfd, O_CLOEXEC); | 
					
						
							|  |  |  |     if (ufd == -1) { | 
					
						
							|  |  |  |         error_report("%s: userfaultfd not available: %s", __func__, | 
					
						
							|  |  |  |                      strerror(errno)); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Version and features check */ | 
					
						
							|  |  |  |     if (!ufd_version_check(ufd)) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  |     /* We don't support postcopy with shared RAM yet */ | 
					
						
							| 
									
										
										
										
											2017-05-17 17:58:10 +01:00
										 |  |  |     if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) { | 
					
						
							| 
									
										
										
										
											2017-03-07 18:36:37 +00:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:22 +00:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * userfault and mlock don't go together; we'll put it back later if | 
					
						
							|  |  |  |      * it was enabled. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (munlockall()) { | 
					
						
							|  |  |  |         error_report("%s: munlockall: %s", __func__,  strerror(errno)); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      *  We need to check that the ops we need are supported on anon memory | 
					
						
							|  |  |  |      *  To do that we need to register a chunk and see the flags that | 
					
						
							|  |  |  |      *  are returned. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | | 
					
						
							|  |  |  |                                     MAP_ANONYMOUS, -1, 0); | 
					
						
							|  |  |  |     if (testarea == MAP_FAILED) { | 
					
						
							|  |  |  |         error_report("%s: Failed to map test area: %s", __func__, | 
					
						
							|  |  |  |                      strerror(errno)); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     g_assert(((size_t)testarea & (pagesize-1)) == 0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     reg_struct.range.start = (uintptr_t)testarea; | 
					
						
							|  |  |  |     reg_struct.range.len = pagesize; | 
					
						
							|  |  |  |     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { | 
					
						
							|  |  |  |         error_report("%s userfault register: %s", __func__, strerror(errno)); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     range_struct.start = (uintptr_t)testarea; | 
					
						
							|  |  |  |     range_struct.len = pagesize; | 
					
						
							|  |  |  |     if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { | 
					
						
							|  |  |  |         error_report("%s userfault unregister: %s", __func__, strerror(errno)); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     feature_mask = (__u64)1 << _UFFDIO_WAKE | | 
					
						
							|  |  |  |                    (__u64)1 << _UFFDIO_COPY | | 
					
						
							|  |  |  |                    (__u64)1 << _UFFDIO_ZEROPAGE; | 
					
						
							|  |  |  |     if ((reg_struct.ioctls & feature_mask) != feature_mask) { | 
					
						
							|  |  |  |         error_report("Missing userfault map features: %" PRIx64, | 
					
						
							|  |  |  |                      (uint64_t)(~reg_struct.ioctls & feature_mask)); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Success! */ | 
					
						
							|  |  |  |     ret = true; | 
					
						
							|  |  |  | out: | 
					
						
							|  |  |  |     if (testarea) { | 
					
						
							|  |  |  |         munmap(testarea, pagesize); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (ufd != -1) { | 
					
						
							|  |  |  |         close(ufd); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Setup an area of RAM so that it *can* be used for postcopy later; this | 
					
						
							|  |  |  |  * must be done right at the start prior to pre-copy. | 
					
						
							|  |  |  |  * opaque should be the MIS. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int init_range(const char *block_name, void *host_addr, | 
					
						
							|  |  |  |                       ram_addr_t offset, ram_addr_t length, void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     trace_postcopy_init_range(block_name, host_addr, offset, length); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * We need the whole of RAM to be truly empty for postcopy, so things | 
					
						
							|  |  |  |      * like ROMs and any data tables built during init must be zero'd | 
					
						
							|  |  |  |      * - we're going to get the copy from the source anyway. | 
					
						
							|  |  |  |      * (Precopy will just overwrite this data, so doesn't need the discard) | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2017-03-21 11:35:24 +01:00
										 |  |  |     if (ram_discard_range(block_name, 0, length)) { | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * At the end of migration, undo the effects of init_range | 
					
						
							|  |  |  |  * opaque should be the MIS. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int cleanup_range(const char *block_name, void *host_addr, | 
					
						
							|  |  |  |                         ram_addr_t offset, ram_addr_t length, void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = opaque; | 
					
						
							|  |  |  |     struct uffdio_range range_struct; | 
					
						
							|  |  |  |     trace_postcopy_cleanup_range(block_name, host_addr, offset, length); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * We turned off hugepage for the precopy stage with postcopy enabled | 
					
						
							|  |  |  |      * we can turn it back on now. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2015-11-19 15:27:48 +00:00
										 |  |  |     qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * We can also turn off userfault now since we should have all the | 
					
						
							|  |  |  |      * pages.   It can be useful to leave it on to debug postcopy | 
					
						
							|  |  |  |      * if you're not sure it's always getting every page. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     range_struct.start = (uintptr_t)host_addr; | 
					
						
							|  |  |  |     range_struct.len = length; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { | 
					
						
							|  |  |  |         error_report("%s: userfault unregister %s", __func__, strerror(errno)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Initialise postcopy-ram, setting the RAM to a state where we can go into | 
					
						
							|  |  |  |  * postcopy later; must be called prior to any precopy. | 
					
						
							|  |  |  |  * called from arch_init's similarly named ram_postcopy_incoming_init | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2017-03-21 11:35:24 +01:00
										 |  |  |     if (qemu_ram_foreach_block(init_range, NULL)) { | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * At the end of a migration where postcopy_ram_incoming_init was called. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     trace_postcopy_ram_incoming_cleanup_entry(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (mis->have_fault_thread) { | 
					
						
							|  |  |  |         uint64_t tmp64; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (qemu_ram_foreach_block(cleanup_range, mis)) { | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         /*
 | 
					
						
							|  |  |  |          * Tell the fault_thread to exit, it's an eventfd that should | 
					
						
							|  |  |  |          * currently be at 0, we're going to increment it to 1 | 
					
						
							|  |  |  |          */ | 
					
						
							|  |  |  |         tmp64 = 1; | 
					
						
							|  |  |  |         if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) { | 
					
						
							|  |  |  |             trace_postcopy_ram_incoming_cleanup_join(); | 
					
						
							|  |  |  |             qemu_thread_join(&mis->fault_thread); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             /* Not much we can do here, but may as well report it */ | 
					
						
							|  |  |  |             error_report("%s: incrementing userfault_quit_fd: %s", __func__, | 
					
						
							|  |  |  |                          strerror(errno)); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         trace_postcopy_ram_incoming_cleanup_closeuf(); | 
					
						
							|  |  |  |         close(mis->userfault_fd); | 
					
						
							|  |  |  |         close(mis->userfault_quit_fd); | 
					
						
							|  |  |  |         mis->have_fault_thread = false; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:23 +00:00
										 |  |  |     qemu_balloon_inhibit(false); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:22 +00:00
										 |  |  |     if (enable_mlock) { | 
					
						
							|  |  |  |         if (os_mlock() < 0) { | 
					
						
							|  |  |  |             error_report("mlock: %s", strerror(errno)); | 
					
						
							|  |  |  |             /*
 | 
					
						
							|  |  |  |              * It doesn't feel right to fail at this point, we have a valid | 
					
						
							|  |  |  |              * VM state. | 
					
						
							|  |  |  |              */ | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     postcopy_state_set(POSTCOPY_INCOMING_END); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |     if (mis->postcopy_tmp_page) { | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |         munmap(mis->postcopy_tmp_page, mis->largest_page_size); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |         mis->postcopy_tmp_page = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:36 +00:00
										 |  |  |     if (mis->postcopy_tmp_zero_page) { | 
					
						
							|  |  |  |         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); | 
					
						
							|  |  |  |         mis->postcopy_tmp_zero_page = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     trace_postcopy_ram_incoming_cleanup_exit(); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:20 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Disable huge pages on an area | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int nhp_range(const char *block_name, void *host_addr, | 
					
						
							|  |  |  |                     ram_addr_t offset, ram_addr_t length, void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     trace_postcopy_nhp_range(block_name, host_addr, offset, length); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Before we do discards we need to ensure those discards really | 
					
						
							|  |  |  |      * do delete areas of the page, even if THP thinks a hugepage would | 
					
						
							|  |  |  |      * be a good idea, so force hugepages off. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2015-11-19 15:27:48 +00:00
										 |  |  |     qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:20 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard | 
					
						
							|  |  |  |  * however leaving it until after precopy means that most of the precopy | 
					
						
							|  |  |  |  * data is still THPd | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int postcopy_ram_prepare_discard(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (qemu_ram_foreach_block(nhp_range, mis)) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     postcopy_state_set(POSTCOPY_INCOMING_DISCARD); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Mark the given area of RAM as requiring notification to unwritten areas | 
					
						
							|  |  |  |  * Used as a  callback on qemu_ram_foreach_block. | 
					
						
							|  |  |  |  *   host_addr: Base of area to mark | 
					
						
							|  |  |  |  *   offset: Offset in the whole ram arena | 
					
						
							|  |  |  |  *   length: Length of the section | 
					
						
							|  |  |  |  *   opaque: MigrationIncomingState pointer | 
					
						
							|  |  |  |  * Returns 0 on success | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int ram_block_enable_notify(const char *block_name, void *host_addr, | 
					
						
							|  |  |  |                                    ram_addr_t offset, ram_addr_t length, | 
					
						
							|  |  |  |                                    void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = opaque; | 
					
						
							|  |  |  |     struct uffdio_register reg_struct; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     reg_struct.range.start = (uintptr_t)host_addr; | 
					
						
							|  |  |  |     reg_struct.range.len = length; | 
					
						
							|  |  |  |     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Now tell our userfault_fd that it's responsible for this area */ | 
					
						
							|  |  |  |     if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, ®_struct)) { | 
					
						
							|  |  |  |         error_report("%s userfault register: %s", __func__, strerror(errno)); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:44 +00:00
										 |  |  |     if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { | 
					
						
							|  |  |  |         error_report("%s userfault: Region doesn't support COPY", __func__); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Handle faults detected by the USERFAULT markings | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static void *postcopy_ram_fault_thread(void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = opaque; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     struct uffd_msg msg; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  |     RAMBlock *rb = NULL; | 
					
						
							|  |  |  |     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     trace_postcopy_ram_fault_thread_entry(); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  |     qemu_sem_post(&mis->fault_thread_sem); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     while (true) { | 
					
						
							|  |  |  |         ram_addr_t rb_offset; | 
					
						
							|  |  |  |         struct pollfd pfd[2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /*
 | 
					
						
							|  |  |  |          * We're mainly waiting for the kernel to give us a faulting HVA, | 
					
						
							|  |  |  |          * however we can be told to quit via userfault_quit_fd which is | 
					
						
							|  |  |  |          * an eventfd | 
					
						
							|  |  |  |          */ | 
					
						
							|  |  |  |         pfd[0].fd = mis->userfault_fd; | 
					
						
							|  |  |  |         pfd[0].events = POLLIN; | 
					
						
							|  |  |  |         pfd[0].revents = 0; | 
					
						
							|  |  |  |         pfd[1].fd = mis->userfault_quit_fd; | 
					
						
							|  |  |  |         pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ | 
					
						
							|  |  |  |         pfd[1].revents = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (poll(pfd, 2, -1 /* Wait forever */) == -1) { | 
					
						
							|  |  |  |             error_report("%s: userfault poll: %s", __func__, strerror(errno)); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (pfd[1].revents) { | 
					
						
							|  |  |  |             trace_postcopy_ram_fault_thread_quit(); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ret = read(mis->userfault_fd, &msg, sizeof(msg)); | 
					
						
							|  |  |  |         if (ret != sizeof(msg)) { | 
					
						
							|  |  |  |             if (errno == EAGAIN) { | 
					
						
							|  |  |  |                 /*
 | 
					
						
							|  |  |  |                  * if a wake up happens on the other thread just after | 
					
						
							|  |  |  |                  * the poll, there is nothing to read. | 
					
						
							|  |  |  |                  */ | 
					
						
							|  |  |  |                 continue; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             if (ret < 0) { | 
					
						
							|  |  |  |                 error_report("%s: Failed to read full userfault message: %s", | 
					
						
							|  |  |  |                              __func__, strerror(errno)); | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 error_report("%s: Read %d bytes from userfaultfd expected %zd", | 
					
						
							|  |  |  |                              __func__, ret, sizeof(msg)); | 
					
						
							|  |  |  |                 break; /* Lost alignment, don't know what we'd read next */ | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (msg.event != UFFD_EVENT_PAGEFAULT) { | 
					
						
							|  |  |  |             error_report("%s: Read unexpected event %ud from userfaultfd", | 
					
						
							|  |  |  |                          __func__, msg.event); | 
					
						
							|  |  |  |             continue; /* It's not a page fault, shouldn't happen */ | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         rb = qemu_ram_block_from_host( | 
					
						
							|  |  |  |                  (void *)(uintptr_t)msg.arg.pagefault.address, | 
					
						
							| 
									
										
										
										
											2016-05-26 10:07:50 +02:00
										 |  |  |                  true, &rb_offset); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |         if (!rb) { | 
					
						
							|  |  |  |             error_report("postcopy_ram_fault_thread: Fault outside guest: %" | 
					
						
							|  |  |  |                          PRIx64, (uint64_t)msg.arg.pagefault.address); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:38 +00:00
										 |  |  |         rb_offset &= ~(qemu_ram_pagesize(rb) - 1); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, | 
					
						
							|  |  |  |                                                 qemu_ram_get_idstr(rb), | 
					
						
							|  |  |  |                                                 rb_offset); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /*
 | 
					
						
							|  |  |  |          * Send the request to the source - we want to request one | 
					
						
							|  |  |  |          * of our host page sizes (which is >= TPS) | 
					
						
							|  |  |  |          */ | 
					
						
							|  |  |  |         if (rb != last_rb) { | 
					
						
							|  |  |  |             last_rb = rb; | 
					
						
							|  |  |  |             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:38 +00:00
										 |  |  |                                      rb_offset, qemu_ram_pagesize(rb)); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |         } else { | 
					
						
							|  |  |  |             /* Save some space */ | 
					
						
							|  |  |  |             migrate_send_rp_req_pages(mis, NULL, | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:38 +00:00
										 |  |  |                                      rb_offset, qemu_ram_pagesize(rb)); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     trace_postcopy_ram_fault_thread_exit(); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int postcopy_ram_enable_notify(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     /* Open the fd for the kernel to give us userfaults */ | 
					
						
							|  |  |  |     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); | 
					
						
							|  |  |  |     if (mis->userfault_fd == -1) { | 
					
						
							|  |  |  |         error_report("%s: Failed to open userfault fd: %s", __func__, | 
					
						
							|  |  |  |                      strerror(errno)); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Although the host check already tested the API, we need to | 
					
						
							|  |  |  |      * do the check again as an ABI handshake on the new fd. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (!ufd_version_check(mis->userfault_fd)) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Now an eventfd we use to tell the fault-thread to quit */ | 
					
						
							|  |  |  |     mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC); | 
					
						
							|  |  |  |     if (mis->userfault_quit_fd == -1) { | 
					
						
							|  |  |  |         error_report("%s: Opening userfault_quit_fd: %s", __func__, | 
					
						
							|  |  |  |                      strerror(errno)); | 
					
						
							|  |  |  |         close(mis->userfault_fd); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  |     qemu_sem_init(&mis->fault_thread_sem, 0); | 
					
						
							|  |  |  |     qemu_thread_create(&mis->fault_thread, "postcopy/fault", | 
					
						
							|  |  |  |                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE); | 
					
						
							|  |  |  |     qemu_sem_wait(&mis->fault_thread_sem); | 
					
						
							|  |  |  |     qemu_sem_destroy(&mis->fault_thread_sem); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     mis->have_fault_thread = true; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Mark so that we get notified of accesses to unwritten areas */ | 
					
						
							|  |  |  |     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:23 +00:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Ballooning can mark pages as absent while we're postcopying | 
					
						
							|  |  |  |      * that would cause false userfaults. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     qemu_balloon_inhibit(true); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:17 +00:00
										 |  |  |     trace_postcopy_ram_enable_notify(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Place a host page (from) at (host) atomically | 
					
						
							|  |  |  |  * returns 0 on success | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  | int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, | 
					
						
							|  |  |  |                         size_t pagesize) | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     struct uffdio_copy copy_struct; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     copy_struct.dst = (uint64_t)(uintptr_t)host; | 
					
						
							|  |  |  |     copy_struct.src = (uint64_t)(uintptr_t)from; | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |     copy_struct.len = pagesize; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |     copy_struct.mode = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* copy also acks to the kernel waking the stalled thread up
 | 
					
						
							|  |  |  |      * TODO: We can inhibit that ack and only do it if it was requested | 
					
						
							|  |  |  |      * which would be slightly cheaper, but we'd have to be careful | 
					
						
							|  |  |  |      * of the order of updating our page state. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { | 
					
						
							|  |  |  |         int e = errno; | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |         error_report("%s: %s copy host: %p from: %p (size: %zd)", | 
					
						
							|  |  |  |                      __func__, strerror(e), host, from, pagesize); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return -e; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     trace_postcopy_place_page(host); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Place a zero page at (host) atomically | 
					
						
							|  |  |  |  * returns 0 on success | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  | int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, | 
					
						
							|  |  |  |                              size_t pagesize) | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |     trace_postcopy_place_page_zero(host); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |     if (pagesize == getpagesize()) { | 
					
						
							|  |  |  |         struct uffdio_zeropage zero_struct; | 
					
						
							|  |  |  |         zero_struct.range.start = (uint64_t)(uintptr_t)host; | 
					
						
							|  |  |  |         zero_struct.range.len = getpagesize(); | 
					
						
							|  |  |  |         zero_struct.mode = 0; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |         if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { | 
					
						
							|  |  |  |             int e = errno; | 
					
						
							|  |  |  |             error_report("%s: %s zero host: %p", | 
					
						
							|  |  |  |                          __func__, strerror(e), host); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |             return -e; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:36 +00:00
										 |  |  |         /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ | 
					
						
							|  |  |  |         if (!mis->postcopy_tmp_zero_page) { | 
					
						
							|  |  |  |             mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, | 
					
						
							|  |  |  |                                                PROT_READ | PROT_WRITE, | 
					
						
							|  |  |  |                                                MAP_PRIVATE | MAP_ANONYMOUS, | 
					
						
							|  |  |  |                                                -1, 0); | 
					
						
							|  |  |  |             if (mis->postcopy_tmp_zero_page == MAP_FAILED) { | 
					
						
							|  |  |  |                 int e = errno; | 
					
						
							|  |  |  |                 mis->postcopy_tmp_zero_page = NULL; | 
					
						
							|  |  |  |                 error_report("%s: %s mapping large zero page", | 
					
						
							|  |  |  |                              __func__, strerror(e)); | 
					
						
							|  |  |  |                 return -e; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, | 
					
						
							|  |  |  |                                    pagesize); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Returns a target page of memory that can be mapped at a later point in time | 
					
						
							|  |  |  |  * using postcopy_place_page | 
					
						
							|  |  |  |  * The same address is used repeatedly, postcopy_place_page just takes the | 
					
						
							|  |  |  |  * backing page away. | 
					
						
							|  |  |  |  * Returns: Pointer to allocated page | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | void *postcopy_get_tmp_page(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (!mis->postcopy_tmp_page) { | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  |         mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |                              PROT_READ | PROT_WRITE, MAP_PRIVATE | | 
					
						
							|  |  |  |                              MAP_ANONYMOUS, -1, 0); | 
					
						
							| 
									
										
										
										
											2016-07-29 12:48:25 +03:00
										 |  |  |         if (mis->postcopy_tmp_page == MAP_FAILED) { | 
					
						
							|  |  |  |             mis->postcopy_tmp_page = NULL; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  |             error_report("%s: %s", __func__, strerror(errno)); | 
					
						
							|  |  |  |             return NULL; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return mis->postcopy_tmp_page; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #else
 | 
					
						
							|  |  |  | /* No target OS support, stubs just fail */ | 
					
						
							|  |  |  | bool postcopy_ram_supported_by_host(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     error_report("%s: No OS support", __func__); | 
					
						
							|  |  |  |     return false; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:03 +00:00
										 |  |  | int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     error_report("postcopy_ram_incoming_init: No OS support"); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:20 +00:00
										 |  |  | int postcopy_ram_prepare_discard(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:04 +00:00
										 |  |  | int postcopy_ram_enable_notify(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  | int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, | 
					
						
							|  |  |  |                         size_t pagesize) | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-24 18:28:35 +00:00
										 |  |  | int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, | 
					
						
							|  |  |  |                         size_t pagesize) | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:10 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void *postcopy_get_tmp_page(MigrationIncomingState *mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(0); | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:10:55 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  | /* ------------------------------------------------------------------------- */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * postcopy_discard_send_init: Called at the start of each RAMBlock before | 
					
						
							|  |  |  |  *   asking to discard individual ranges. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * @ms: The current migration state. | 
					
						
							|  |  |  |  * @offset: the bitmap offset of the named RAMBlock in the migration | 
					
						
							|  |  |  |  *   bitmap. | 
					
						
							|  |  |  |  * @name: RAMBlock that discards will operate on. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * returns: a new PDS. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, | 
					
						
							|  |  |  |                                                  const char *name) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (res) { | 
					
						
							|  |  |  |         res->ramblock_name = name; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return res; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * postcopy_discard_send_range: Called by the bitmap code for each chunk to | 
					
						
							|  |  |  |  *   discard. May send a discard message, may just leave it queued to | 
					
						
							|  |  |  |  *   be sent later. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * @ms: Current migration state. | 
					
						
							|  |  |  |  * @pds: Structure initialised by postcopy_discard_send_init(). | 
					
						
							|  |  |  |  * @start,@length: a range of pages in the migration bitmap in the | 
					
						
							|  |  |  |  *   RAM block passed to postcopy_discard_send_init() (length=1 is one page) | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, | 
					
						
							|  |  |  |                                 unsigned long start, unsigned long length) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2017-03-21 09:09:14 +01:00
										 |  |  |     size_t tp_size = qemu_target_page_size(); | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  |     /* Convert to byte offsets within the RAM block */ | 
					
						
							| 
									
										
										
										
											2017-03-22 15:18:04 +01:00
										 |  |  |     pds->start_list[pds->cur_entry] = start  * tp_size; | 
					
						
							| 
									
										
										
										
											2017-03-21 09:09:14 +01:00
										 |  |  |     pds->length_list[pds->cur_entry] = length * tp_size; | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  |     trace_postcopy_discard_send_range(pds->ramblock_name, start, length); | 
					
						
							|  |  |  |     pds->cur_entry++; | 
					
						
							|  |  |  |     pds->nsentwords++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { | 
					
						
							|  |  |  |         /* Full set, ship it! */ | 
					
						
							| 
									
										
										
										
											2016-01-15 11:37:42 +08:00
										 |  |  |         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, | 
					
						
							|  |  |  |                                               pds->ramblock_name, | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  |                                               pds->cur_entry, | 
					
						
							|  |  |  |                                               pds->start_list, | 
					
						
							|  |  |  |                                               pds->length_list); | 
					
						
							|  |  |  |         pds->nsentcmds++; | 
					
						
							|  |  |  |         pds->cur_entry = 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * postcopy_discard_send_finish: Called at the end of each RAMBlock by the | 
					
						
							|  |  |  |  * bitmap code. Sends any outstanding discard messages, frees the PDS | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * @ms: Current migration state. | 
					
						
							|  |  |  |  * @pds: Structure initialised by postcopy_discard_send_init(). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /* Anything unsent? */ | 
					
						
							|  |  |  |     if (pds->cur_entry) { | 
					
						
							| 
									
										
										
										
											2016-01-15 11:37:42 +08:00
										 |  |  |         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, | 
					
						
							|  |  |  |                                               pds->ramblock_name, | 
					
						
							| 
									
										
										
										
											2015-11-05 18:11:02 +00:00
										 |  |  |                                               pds->cur_entry, | 
					
						
							|  |  |  |                                               pds->start_list, | 
					
						
							|  |  |  |                                               pds->length_list); | 
					
						
							|  |  |  |         pds->nsentcmds++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords, | 
					
						
							|  |  |  |                                        pds->nsentcmds); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     g_free(pds); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2017-04-24 16:50:35 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Current state of incoming postcopy; note this is not part of | 
					
						
							|  |  |  |  * MigrationIncomingState since it's state is used during cleanup | 
					
						
							|  |  |  |  * at the end as MIS is being freed. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static PostcopyState incoming_postcopy_state; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PostcopyState  postcopy_state_get(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return atomic_mb_read(&incoming_postcopy_state); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Set the state and return the old state */ | 
					
						
							|  |  |  | PostcopyState postcopy_state_set(PostcopyState new_state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return atomic_xchg(&incoming_postcopy_state, new_state); | 
					
						
							|  |  |  | } |