| 
									
										
										
										
											2016-10-27 14:42:52 +08:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | 
					
						
							|  |  |  |  * (a.k.a. Fault Tolerance or Continuous Replication) | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | 
					
						
							|  |  |  |  * Copyright (c) 2016 FUJITSU LIMITED | 
					
						
							|  |  |  |  * Copyright (c) 2016 Intel Corporation | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This work is licensed under the terms of the GNU GPL, version 2 or | 
					
						
							|  |  |  |  * later.  See the COPYING file in the top-level directory. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "qemu/osdep.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | #include "sysemu/sysemu.h"
 | 
					
						
							| 
									
										
										
										
											2018-02-01 12:18:31 +01:00
										 |  |  | #include "qapi/error.h"
 | 
					
						
							| 
									
										
										
										
											2018-02-11 10:36:01 +01:00
										 |  |  | #include "qapi/qapi-commands-migration.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-24 20:07:27 +02:00
										 |  |  | #include "migration.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-20 18:52:18 +02:00
										 |  |  | #include "qemu-file.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-20 14:48:46 +02:00
										 |  |  | #include "savevm.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:52 +08:00
										 |  |  | #include "migration/colo.h"
 | 
					
						
							| 
									
										
										
										
											2017-04-21 14:31:22 +02:00
										 |  |  | #include "block.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  | #include "io/channel-buffer.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | #include "trace.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  | #include "qemu/error-report.h"
 | 
					
						
							| 
									
										
										
											
												Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h).  It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed.  Touching it now
recompiles only some 1700 objects.  For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800.  For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
											
										 
											2019-08-12 07:23:50 +02:00
										 |  |  | #include "qemu/main-loop.h"
 | 
					
						
							| 
									
										
										
										
											2019-08-12 07:23:46 +02:00
										 |  |  | #include "qemu/rcu.h"
 | 
					
						
							| 
									
										
										
											
												COLO: Add 'x-colo-lost-heartbeat' command to trigger failover
We leave users to choose whatever heartbeat solution they want,
if the heartbeat is lost, or other errors they detect, they can use
experimental command 'x_colo_lost_heartbeat' to tell COLO to do failover,
COLO will do operations accordingly.
For example, if the command is sent to the Primary side,
the Primary side will exit COLO mode, does cleanup work,
and then, PVM will take over the service work. If sent to the Secondary side,
the Secondary side will run failover work, then takes over PVM's service work.
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Eric Blake <eblake@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Amit Shah <amit@amitshah.net>
											
										 
											2016-10-27 14:43:03 +08:00
										 |  |  | #include "migration/failover.h"
 | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:10 +08:00
										 |  |  | #include "migration/ram.h"
 | 
					
						
							| 
									
										
										
										
											2021-05-17 07:36:56 -04:00
										 |  |  | #include "block/replication.h"
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | #include "net/colo-compare.h"
 | 
					
						
							|  |  |  | #include "net/colo.h"
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  | #include "block/block.h"
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  | #include "qapi/qapi-events-migration.h"
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:54 +08:00
										 |  |  | #include "sysemu/cpus.h"
 | 
					
						
							| 
									
										
										
										
											2019-08-12 07:23:59 +02:00
										 |  |  | #include "sysemu/runstate.h"
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:58 +08:00
										 |  |  | #include "net/filter.h"
 | 
					
						
							| 
									
										
										
										
											2023-03-01 21:18:45 +01:00
										 |  |  | #include "options.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:52 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:44 +08:00
										 |  |  | static bool vmstate_loading; | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | static Notifier packets_compare_notifier; | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:44 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:33 +08:00
										 |  |  | /* User need to know colo mode after COLO failover */ | 
					
						
							|  |  |  | static COLOMode last_colo_mode; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  | #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | bool migration_in_colo_state(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationState *s = migrate_get_current(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (s->state == MIGRATION_STATUS_COLO); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | bool migration_incoming_in_colo_state(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = migration_incoming_get_current(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return mis && (mis->state == MIGRATION_STATUS_COLO); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | static bool colo_runstate_is_stopped(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-28 22:49:20 +03:00
										 |  |  | static void colo_checkpoint_notify(void *opaque) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationState *s = opaque; | 
					
						
							|  |  |  |     int64_t next_notify_time; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     qemu_event_set(&s->colo_checkpoint_event); | 
					
						
							|  |  |  |     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); | 
					
						
							|  |  |  |     next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); | 
					
						
							|  |  |  |     timer_mod(s->colo_delay_timer, next_notify_time); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void colo_checkpoint_delay_set(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (migration_in_colo_state()) { | 
					
						
							|  |  |  |         colo_checkpoint_notify(migrate_get_current()); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  | static void secondary_vm_do_failover(void) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-01 10:12:26 +08:00
										 |  |  | /* COLO needs enable block-replication */ | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |     int old_state; | 
					
						
							|  |  |  |     MigrationIncomingState *mis = migration_incoming_get_current(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     Error *local_err = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:44 +08:00
										 |  |  |     /* Can not do failover during the process of VM's loading VMstate, Or
 | 
					
						
							|  |  |  |      * it will break the secondary VM. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (vmstate_loading) { | 
					
						
							|  |  |  |         old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | 
					
						
							|  |  |  |                         FAILOVER_STATUS_RELAUNCH); | 
					
						
							|  |  |  |         if (old_state != FAILOVER_STATUS_ACTIVE) { | 
					
						
							|  |  |  |             error_report("Unknown error while do failover for secondary VM," | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |                          "old_state: %s", FailoverStatus_str(old_state)); | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:44 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, | 
					
						
							|  |  |  |                       MIGRATION_STATUS_COMPLETED); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     replication_stop_all(true, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							| 
									
										
										
										
											2020-03-24 18:36:28 +03:00
										 |  |  |         local_err = NULL; | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:58 +08:00
										 |  |  |     /* Notify all filters of all NIC to do checkpoint */ | 
					
						
							|  |  |  |     colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |     if (!autostart) { | 
					
						
							|  |  |  |         error_report("\"-S\" qemu option will be ignored in secondary side"); | 
					
						
							|  |  |  |         /* recover runstate to normal migration finish state */ | 
					
						
							|  |  |  |         autostart = true; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Make sure COLO incoming thread not block in recv or send, | 
					
						
							|  |  |  |      * If mis->from_src_file and mis->to_src_file use the same fd, | 
					
						
							|  |  |  |      * The second shutdown() will return -1, we ignore this value, | 
					
						
							|  |  |  |      * It is harmless. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (mis->from_src_file) { | 
					
						
							|  |  |  |         qemu_file_shutdown(mis->from_src_file); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (mis->to_src_file) { | 
					
						
							|  |  |  |         qemu_file_shutdown(mis->to_src_file); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | 
					
						
							|  |  |  |                                    FAILOVER_STATUS_COMPLETED); | 
					
						
							|  |  |  |     if (old_state != FAILOVER_STATUS_ACTIVE) { | 
					
						
							|  |  |  |         error_report("Incorrect state (%s) while doing failover for " | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |                      "secondary VM", FailoverStatus_str(old_state)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /* Notify COLO incoming thread that failover work is finished */ | 
					
						
							|  |  |  |     qemu_sem_post(&mis->colo_incoming_sem); | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:30 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |     /* For Secondary VM, jump to incoming co */ | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:39 +03:00
										 |  |  |     if (mis->colo_incoming_co) { | 
					
						
							|  |  |  |         qemu_coroutine_enter(mis->colo_incoming_co); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | static void primary_vm_do_failover(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationState *s = migrate_get_current(); | 
					
						
							|  |  |  |     int old_state; | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     Error *local_err = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     migrate_set_state(&s->state, MIGRATION_STATUS_COLO, | 
					
						
							|  |  |  |                       MIGRATION_STATUS_COMPLETED); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:59 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * kick COLO thread which might wait at | 
					
						
							|  |  |  |      * qemu_sem_wait(&s->colo_checkpoint_sem). | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-11-01 15:56:59 +08:00
										 |  |  |     colo_checkpoint_notify(s); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Wake up COLO thread which may blocked in recv() or send(), | 
					
						
							|  |  |  |      * The s->rp_state.from_dst_file and s->to_dst_file may use the | 
					
						
							|  |  |  |      * same fd, but we still shutdown the fd for twice, it is harmless. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (s->to_dst_file) { | 
					
						
							|  |  |  |         qemu_file_shutdown(s->to_dst_file); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (s->rp_state.from_dst_file) { | 
					
						
							|  |  |  |         qemu_file_shutdown(s->rp_state.from_dst_file); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | 
					
						
							|  |  |  |                                    FAILOVER_STATUS_COMPLETED); | 
					
						
							|  |  |  |     if (old_state != FAILOVER_STATUS_ACTIVE) { | 
					
						
							|  |  |  |         error_report("Incorrect state (%s) while doing failover for Primary VM", | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |                      FailoverStatus_str(old_state)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     replication_stop_all(true, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							|  |  |  |         local_err = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /* Notify COLO thread that failover work is finished */ | 
					
						
							|  |  |  |     qemu_sem_post(&s->colo_exit_sem); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:47 +08:00
										 |  |  | COLOMode get_colo_mode(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (migration_in_colo_state()) { | 
					
						
							|  |  |  |         return COLO_MODE_PRIMARY; | 
					
						
							|  |  |  |     } else if (migration_incoming_in_colo_state()) { | 
					
						
							|  |  |  |         return COLO_MODE_SECONDARY; | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:52 +08:00
										 |  |  |         return COLO_MODE_NONE; | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:47 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-26 17:07:28 +08:00
										 |  |  | void colo_do_failover(void) | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  | { | 
					
						
							|  |  |  |     /* Make sure VM stopped while failover happened. */ | 
					
						
							|  |  |  |     if (!colo_runstate_is_stopped()) { | 
					
						
							|  |  |  |         vm_stop_force_state(RUN_STATE_COLO); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-01 15:57:04 +08:00
										 |  |  |     switch (last_colo_mode = get_colo_mode()) { | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:32 +08:00
										 |  |  |     case COLO_MODE_PRIMARY: | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |         primary_vm_do_failover(); | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:32 +08:00
										 |  |  |         break; | 
					
						
							|  |  |  |     case COLO_MODE_SECONDARY: | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |         secondary_vm_do_failover(); | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:32 +08:00
										 |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |         error_report("colo_do_failover failed because the colo mode" | 
					
						
							|  |  |  |                      " could not be obtained"); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-28 11:01:56 -08:00
										 |  |  | void qmp_xen_set_replication(bool enable, bool primary, | 
					
						
							|  |  |  |                              bool has_failover, bool failover, | 
					
						
							|  |  |  |                              Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     ReplicationMode mode = primary ? | 
					
						
							|  |  |  |                            REPLICATION_MODE_PRIMARY : | 
					
						
							|  |  |  |                            REPLICATION_MODE_SECONDARY; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (has_failover && enable) { | 
					
						
							|  |  |  |         error_setg(errp, "Parameter 'failover' is only for" | 
					
						
							|  |  |  |                    " stopping replication"); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (enable) { | 
					
						
							|  |  |  |         replication_start_all(mode, errp); | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         if (!has_failover) { | 
					
						
							|  |  |  |             failover = NULL; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         replication_stop_all(failover, failover ? NULL : errp); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-25 10:46:04 +08:00
										 |  |  | ReplicationStatus *qmp_query_xen_replication_status(Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     Error *err = NULL; | 
					
						
							|  |  |  |     ReplicationStatus *s = g_new0(ReplicationStatus, 1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     replication_get_error_all(&err); | 
					
						
							|  |  |  |     if (err) { | 
					
						
							|  |  |  |         s->error = true; | 
					
						
							|  |  |  |         s->desc = g_strdup(error_get_pretty(err)); | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         s->error = false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     error_free(err); | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void qmp_xen_colo_do_checkpoint(Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-04-22 15:07:16 +02:00
										 |  |  |     Error *err = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     replication_do_checkpoint_all(&err); | 
					
						
							|  |  |  |     if (err) { | 
					
						
							|  |  |  |         error_propagate(errp, err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-06-10 00:44:33 +08:00
										 |  |  |     /* Notify all filters of all NIC to do checkpoint */ | 
					
						
							|  |  |  |     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp); | 
					
						
							| 
									
										
										
										
											2017-02-25 10:46:04 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:53 +08:00
										 |  |  | COLOStatus *qmp_query_colo_status(Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     COLOStatus *s = g_new0(COLOStatus, 1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     s->mode = get_colo_mode(); | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:33 +08:00
										 |  |  |     s->last_mode = last_colo_mode; | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     switch (failover_get_state()) { | 
					
						
							|  |  |  |     case FAILOVER_STATUS_NONE: | 
					
						
							|  |  |  |         s->reason = COLO_EXIT_REASON_NONE; | 
					
						
							|  |  |  |         break; | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:30 +08:00
										 |  |  |     case FAILOVER_STATUS_COMPLETED: | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:53 +08:00
										 |  |  |         s->reason = COLO_EXIT_REASON_REQUEST; | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:31 +08:00
										 |  |  |         if (migration_in_colo_state()) { | 
					
						
							|  |  |  |             s->reason = COLO_EXIT_REASON_PROCESSING; | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             s->reason = COLO_EXIT_REASON_ERROR; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:53 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | static void colo_send_message(QEMUFile *f, COLOMessage msg, | 
					
						
							|  |  |  |                               Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (msg >= COLO_MESSAGE__MAX) { | 
					
						
							|  |  |  |         error_setg(errp, "%s: Invalid message", __func__); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     qemu_put_be32(f, msg); | 
					
						
							| 
									
										
										
										
											2023-10-25 11:11:17 +02:00
										 |  |  |     ret = qemu_fflush(f); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg_errno(errp, -ret, "Can't send COLO message"); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |     trace_colo_send_message(COLOMessage_str(msg)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  | static void colo_send_message_value(QEMUFile *f, COLOMessage msg, | 
					
						
							|  |  |  |                                     uint64_t value, Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_send_message(f, msg, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     qemu_put_be64(f, value); | 
					
						
							| 
									
										
										
										
											2023-10-25 11:11:17 +02:00
										 |  |  |     ret = qemu_fflush(f); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg_errno(errp, -ret, "Failed to send value for message:%s", | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |                          COLOMessage_str(msg)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     COLOMessage msg; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     msg = qemu_get_be32(f); | 
					
						
							|  |  |  |     ret = qemu_file_get_error(f); | 
					
						
							|  |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg_errno(errp, -ret, "Can't receive COLO message"); | 
					
						
							|  |  |  |         return msg; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (msg >= COLO_MESSAGE__MAX) { | 
					
						
							|  |  |  |         error_setg(errp, "%s: Invalid message", __func__); | 
					
						
							|  |  |  |         return msg; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |     trace_colo_receive_message(COLOMessage_str(msg)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     return msg; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, | 
					
						
							|  |  |  |                                        Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     COLOMessage msg; | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     msg = colo_receive_message(f, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (msg != expect_msg) { | 
					
						
							|  |  |  |         error_setg(errp, "Unexpected COLO message %d, expected %d", | 
					
						
							|  |  |  |                           msg, expect_msg); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  | static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, | 
					
						
							|  |  |  |                                            Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  |     uint64_t value; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_receive_check_message(f, expect_msg, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     value = qemu_get_be64(f); | 
					
						
							|  |  |  |     ret = qemu_file_get_error(f); | 
					
						
							|  |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", | 
					
						
							| 
									
										
										
										
											2017-08-24 10:46:08 +02:00
										 |  |  |                          COLOMessage_str(expect_msg)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  |     } | 
					
						
							|  |  |  |     return value; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  | static int colo_do_checkpoint_transaction(MigrationState *s, | 
					
						
							|  |  |  |                                           QIOChannelBuffer *bioc, | 
					
						
							|  |  |  |                                           QEMUFile *fb) | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | { | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     int ret = -1; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, | 
					
						
							|  |  |  |                       &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_receive_check_message(s->rp_state.from_dst_file, | 
					
						
							|  |  |  |                     COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     /* Reset channel-buffer directly */ | 
					
						
							|  |  |  |     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | 
					
						
							|  |  |  |     bioc->usage = 0; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |     if (failover_get_state() != FAILOVER_STATUS_NONE) { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     vm_stop_force_state(RUN_STATE_COLO); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     trace_colo_vm_state_change("run", "stop"); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Failover request bh could be called after vm_stop_force_state(), | 
					
						
							|  |  |  |      * So we need check failover_request_is_active() again. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (failover_get_state() != FAILOVER_STATUS_NONE) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2018-11-01 10:12:26 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     replication_do_checkpoint_all(&local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:54 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     /* Note: device state is saved into buffer */ | 
					
						
							|  |  |  |     ret = qemu_save_device_state(fb); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:54 +08:00
										 |  |  |     if (ret < 0) { | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-09 11:04:54 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (migrate_auto_converge()) { | 
					
						
							|  |  |  |         mig_throttle_counter_reset(); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:54 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Only save VM's live state, which not including device state. | 
					
						
							|  |  |  |      * TODO: We may need a timeout mechanism to prevent COLO process | 
					
						
							|  |  |  |      * to be blocked here. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     qemu_savevm_live_state(s->to_dst_file); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     qemu_fflush(fb); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * We need the size of the VMstate data in Secondary side, | 
					
						
							|  |  |  |      * With which we can decide how much data should be read. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, | 
					
						
							|  |  |  |                             bioc->usage, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); | 
					
						
							| 
									
										
										
										
											2023-10-25 11:11:17 +02:00
										 |  |  |     ret = qemu_fflush(s->to_dst_file); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     colo_receive_check_message(s->rp_state.from_dst_file, | 
					
						
							|  |  |  |                        COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-11 13:11:01 +02:00
										 |  |  |     qemu_event_reset(&s->colo_checkpoint_event); | 
					
						
							|  |  |  |     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     colo_receive_check_message(s->rp_state.from_dst_file, | 
					
						
							|  |  |  |                        COLO_MESSAGE_VMSTATE_LOADED, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     ret = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     vm_start(); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     trace_colo_vm_state_change("stop", "run"); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | out: | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     return ret; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     colo_checkpoint_notify(data); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | static void colo_process_checkpoint(MigrationState *s) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     QIOChannelBuffer *bioc; | 
					
						
							|  |  |  |     QEMUFile *fb = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-01 15:57:04 +08:00
										 |  |  |     if (get_colo_mode() != COLO_MODE_PRIMARY) { | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:33 +08:00
										 |  |  |         error_report("COLO mode must be COLO_MODE_PRIMARY"); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:04 +08:00
										 |  |  |     failover_init_state(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  |     s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); | 
					
						
							|  |  |  |     if (!s->rp_state.from_dst_file) { | 
					
						
							|  |  |  |         error_report("Open QEMUFile from_dst_file failed"); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  |     packets_compare_notifier.notify = colo_compare_notify_checkpoint; | 
					
						
							|  |  |  |     colo_compare_register_notifier(&packets_compare_notifier); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Wait for Secondary finish loading VM states and enter COLO | 
					
						
							|  |  |  |      * restore. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     colo_receive_check_message(s->rp_state.from_dst_file, | 
					
						
							|  |  |  |                        COLO_MESSAGE_CHECKPOINT_READY, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); | 
					
						
							| 
									
										
										
										
											2022-06-20 12:02:05 +01:00
										 |  |  |     fb = qemu_file_new_output(QIO_CHANNEL(bioc)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     object_unref(OBJECT(bioc)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  |     vm_start(); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  |     trace_colo_vm_state_change("stop", "run"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-11 01:41:56 +08:00
										 |  |  |     timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + | 
					
						
							| 
									
										
										
										
											2023-03-02 01:04:55 +01:00
										 |  |  |               migrate_checkpoint_delay()); | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:42 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     while (s->state == MIGRATION_STATUS_COLO) { | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:05 +08:00
										 |  |  |         if (failover_get_state() != FAILOVER_STATUS_NONE) { | 
					
						
							|  |  |  |             error_report("failover request"); | 
					
						
							|  |  |  |             goto out; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:44 +02:00
										 |  |  |         qemu_event_wait(&s->colo_checkpoint_event); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:02 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:59 +08:00
										 |  |  |         if (s->state != MIGRATION_STATUS_COLO) { | 
					
						
							|  |  |  |             goto out; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |         ret = colo_do_checkpoint_transaction(s, bioc, fb); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         if (ret < 0) { | 
					
						
							|  |  |  |             goto out; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  | out: | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     /* Throw the unreported error message after exited from loop */ | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:59 +08:00
										 |  |  |     if (fb) { | 
					
						
							|  |  |  |         qemu_fclose(fb); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * There are only two reasons we can get here, some error happened | 
					
						
							|  |  |  |      * or the user triggered failover. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     switch (failover_get_state()) { | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:30 +08:00
										 |  |  |     case FAILOVER_STATUS_COMPLETED: | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |         qapi_event_send_colo_exit(COLO_MODE_PRIMARY, | 
					
						
							|  |  |  |                                   COLO_EXIT_REASON_REQUEST); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:31 +08:00
										 |  |  |         qapi_event_send_colo_exit(COLO_MODE_PRIMARY, | 
					
						
							|  |  |  |                                   COLO_EXIT_REASON_ERROR); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /* Hope this not to be too long to wait here */ | 
					
						
							|  |  |  |     qemu_sem_wait(&s->colo_exit_sem); | 
					
						
							|  |  |  |     qemu_sem_destroy(&s->colo_exit_sem); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * It is safe to unregister notifier after failover finished. | 
					
						
							|  |  |  |      * Besides, colo_delay_timer and colo_checkpoint_sem can't be | 
					
						
							| 
									
										
										
										
											2020-09-17 15:50:21 +08:00
										 |  |  |      * released before unregister notifier, or there will be use-after-free | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  |      * error. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     colo_compare_unregister_notifier(&packets_compare_notifier); | 
					
						
							|  |  |  |     timer_free(s->colo_delay_timer); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:44 +02:00
										 |  |  |     qemu_event_destroy(&s->colo_checkpoint_event); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Must be called after failover BH is completed, | 
					
						
							|  |  |  |      * Or the failover BH may shutdown the wrong fd that | 
					
						
							|  |  |  |      * re-used by other threads after we release here. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  |     if (s->rp_state.from_dst_file) { | 
					
						
							|  |  |  |         qemu_fclose(s->rp_state.from_dst_file); | 
					
						
							| 
									
										
											  
											
												colo: fixed 'Segmentation fault' when the simplex mode PVM poweroff
The GDB statck is as follows:
Program terminated with signal SIGSEGV, Segmentation fault.
0  object_class_dynamic_cast (class=0x55c8f5d2bf50, typename=0x55c8f2f7379e "qio-channel") at qom/object.c:832
         if (type->class->interfaces &&
[Current thread is 1 (Thread 0x7f756e97eb00 (LWP 1811577))]
(gdb) bt
0  object_class_dynamic_cast (class=0x55c8f5d2bf50, typename=0x55c8f2f7379e "qio-channel") at qom/object.c:832
1  0x000055c8f2c3dd14 in object_dynamic_cast (obj=0x55c8f543ac00, typename=0x55c8f2f7379e "qio-channel") at qom/object.c:763
2  0x000055c8f2c3ddce in object_dynamic_cast_assert (obj=0x55c8f543ac00, typename=0x55c8f2f7379e "qio-channel",
    file=0x55c8f2f73780 "migration/qemu-file-channel.c", line=117, func=0x55c8f2f73800 <__func__.18724> "channel_shutdown") at qom/object.c:786
3  0x000055c8f2bbc6ac in channel_shutdown (opaque=0x55c8f543ac00, rd=true, wr=true, errp=0x0) at migration/qemu-file-channel.c:117
4  0x000055c8f2bba56e in qemu_file_shutdown (f=0x7f7558070f50) at migration/qemu-file.c:67
5  0x000055c8f2ba5373 in migrate_fd_cancel (s=0x55c8f4ccf3f0) at migration/migration.c:1699
6  0x000055c8f2ba1992 in migration_shutdown () at migration/migration.c:187
7  0x000055c8f29a5b77 in main (argc=69, argv=0x7fff3e9e8c08, envp=0x7fff3e9e8e38) at vl.c:4512
The root cause is that we still want to shutdown the from_dst_file in
migrate_fd_cancel() after qemu_close in colo_process_checkpoint().
So, we should set the s->rp_state.from_dst_file = NULL after
qemu_close().
Signed-off-by: Lei Rao <lei.rao@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
											
										 
											2021-11-01 15:57:02 +08:00
										 |  |  |         s->rp_state.from_dst_file = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void migrate_start_colo_process(MigrationState *s) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:44 +02:00
										 |  |  |     qemu_event_init(&s->colo_checkpoint_event, false); | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:42 +08:00
										 |  |  |     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST, | 
					
						
							|  |  |  |                                 colo_checkpoint_notify, s); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     qemu_sem_init(&s->colo_exit_sem, 0); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  |     colo_process_checkpoint(s); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:54 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  | static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, | 
					
						
							|  |  |  |                       QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint64_t total_size; | 
					
						
							|  |  |  |     uint64_t value; | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |     vm_stop_force_state(RUN_STATE_COLO); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2021-12-15 01:19:45 +08:00
										 |  |  |     trace_colo_vm_state_change("run", "stop"); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* FIXME: This is unnecessary for periodic checkpoint mode */ | 
					
						
							|  |  |  |     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, | 
					
						
							|  |  |  |                  &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_receive_check_message(mis->from_src_file, | 
					
						
							|  |  |  |                        COLO_MESSAGE_VMSTATE_SEND, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:48 +02:00
										 |  |  |     cpu_synchronize_all_states(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |     ret = qemu_loadvm_state_main(mis->from_src_file, mis); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg(errp, "Load VM's live state (ram) error"); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     value = colo_receive_message_value(mis->from_src_file, | 
					
						
							|  |  |  |                              COLO_MESSAGE_VMSTATE_SIZE, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Read VM device state data into channel buffer, | 
					
						
							|  |  |  |      * It's better to re-use the memory allocated. | 
					
						
							|  |  |  |      * Here we need to handle the channel buffer directly. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     if (value > bioc->capacity) { | 
					
						
							|  |  |  |         bioc->capacity = value; | 
					
						
							|  |  |  |         bioc->data = g_realloc(bioc->data, bioc->capacity); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); | 
					
						
							|  |  |  |     if (total_size != value) { | 
					
						
							|  |  |  |         error_setg(errp, "Got %" PRIu64 " VMState data, less than expected" | 
					
						
							|  |  |  |                     " %" PRIu64, total_size, value); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     bioc->usage = total_size; | 
					
						
							|  |  |  |     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, | 
					
						
							|  |  |  |                  &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |     vmstate_loading = true; | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:51 +02:00
										 |  |  |     colo_flush_ram_cache(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |     ret = qemu_load_device_state(fb); | 
					
						
							|  |  |  |     if (ret < 0) { | 
					
						
							|  |  |  |         error_setg(errp, "COLO: load device state failed"); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:55 +02:00
										 |  |  |         vmstate_loading = false; | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     replication_get_error_all(&local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:55 +02:00
										 |  |  |         vmstate_loading = false; | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* discard colo disk buffer */ | 
					
						
							|  |  |  |     replication_do_checkpoint_all(&local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:55 +02:00
										 |  |  |         vmstate_loading = false; | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     /* Notify all filters of all NIC to do checkpoint */ | 
					
						
							|  |  |  |     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:55 +02:00
										 |  |  |         vmstate_loading = false; | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     vmstate_loading = false; | 
					
						
							|  |  |  |     vm_start(); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2021-12-15 01:19:45 +08:00
										 |  |  |     trace_colo_vm_state_change("stop", "run"); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, | 
					
						
							|  |  |  |                  &local_err); | 
					
						
							| 
									
										
										
										
											2020-07-22 10:40:47 +02:00
										 |  |  |     error_propagate(errp, local_err); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void colo_wait_handle_message(MigrationIncomingState *mis, | 
					
						
							|  |  |  |                 QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  | { | 
					
						
							|  |  |  |     COLOMessage msg; | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |     msg = colo_receive_message(mis->from_src_file, &local_err); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_propagate(errp, local_err); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch (msg) { | 
					
						
							|  |  |  |     case COLO_MESSAGE_CHECKPOINT_REQUEST: | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         colo_incoming_process_checkpoint(mis, fb, bioc, errp); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |         error_setg(errp, "Got unknown COLO message: %d", msg); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-11 10:11:33 +08:00
										 |  |  | void colo_shutdown(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = NULL; | 
					
						
							|  |  |  |     MigrationState *s = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch (get_colo_mode()) { | 
					
						
							|  |  |  |     case COLO_MODE_PRIMARY: | 
					
						
							|  |  |  |         s = migrate_get_current(); | 
					
						
							|  |  |  |         qemu_event_set(&s->colo_checkpoint_event); | 
					
						
							|  |  |  |         qemu_sem_post(&s->colo_exit_sem); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     case COLO_MODE_SECONDARY: | 
					
						
							|  |  |  |         mis = migration_incoming_get_current(); | 
					
						
							|  |  |  |         qemu_sem_post(&mis->colo_incoming_sem); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  | static void *colo_process_incoming_thread(void *opaque) | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = opaque; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  |     QEMUFile *fb = NULL; | 
					
						
							|  |  |  |     QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     Error *local_err = NULL; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-06 21:29:29 +08:00
										 |  |  |     rcu_register_thread(); | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     qemu_sem_init(&mis->colo_incoming_sem, 0); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  |     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, | 
					
						
							|  |  |  |                       MIGRATION_STATUS_COLO); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-01 15:57:04 +08:00
										 |  |  |     if (get_colo_mode() != COLO_MODE_SECONDARY) { | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:33 +08:00
										 |  |  |         error_report("COLO mode must be COLO_MODE_SECONDARY"); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:04 +08:00
										 |  |  |     failover_init_state(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  |     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); | 
					
						
							|  |  |  |     if (!mis->to_src_file) { | 
					
						
							|  |  |  |         error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Note: the communication between Primary side and Secondary side | 
					
						
							|  |  |  |      * should be sequential, we set the fd to unblocked in migration incoming | 
					
						
							|  |  |  |      * coroutine, and here we are in the COLO incoming thread, so it is ok to | 
					
						
							|  |  |  |      * set the fd back to blocked. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     qemu_file_set_blocking(mis->from_src_file, true); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:10 +08:00
										 |  |  |     colo_incoming_start_dirty_log(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  |     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); | 
					
						
							| 
									
										
										
										
											2022-06-20 12:02:05 +01:00
										 |  |  |     fb = qemu_file_new_input(QIO_CHANNEL(bioc)); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  |     object_unref(OBJECT(bioc)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |     replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |         bql_unlock(); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:46 +08:00
										 |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  |     vm_start(); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2021-12-15 01:19:45 +08:00
										 |  |  |     trace_colo_vm_state_change("stop", "run"); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:45 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, | 
					
						
							|  |  |  |                       &local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         goto out; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while (mis->state == MIGRATION_STATUS_COLO) { | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |         colo_wait_handle_message(mis, fb, bioc, &local_err); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         if (local_err) { | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |             error_report_err(local_err); | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-05-11 13:10:55 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { | 
					
						
							|  |  |  |             failover_set_state(FAILOVER_STATUS_RELAUNCH, | 
					
						
							|  |  |  |                             FAILOVER_STATUS_NONE); | 
					
						
							|  |  |  |             failover_request_active(NULL); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:06 +08:00
										 |  |  |         if (failover_get_state() != FAILOVER_STATUS_NONE) { | 
					
						
							|  |  |  |             error_report("failover request"); | 
					
						
							| 
									
										
										
										
											2020-02-24 14:54:08 +08:00
										 |  |  |             break; | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:57 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:56 +08:00
										 |  |  | out: | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:31 +08:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * There are only two reasons we can get here, some error happened | 
					
						
							|  |  |  |      * or the user triggered failover. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |     switch (failover_get_state()) { | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:30 +08:00
										 |  |  |     case FAILOVER_STATUS_COMPLETED: | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |         qapi_event_send_colo_exit(COLO_MODE_SECONDARY, | 
					
						
							|  |  |  |                                   COLO_EXIT_REASON_REQUEST); | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     default: | 
					
						
							| 
									
										
										
										
											2019-03-22 18:13:31 +08:00
										 |  |  |         qapi_event_send_colo_exit(COLO_MODE_SECONDARY, | 
					
						
							|  |  |  |                                   COLO_EXIT_REASON_ERROR); | 
					
						
							| 
									
										
										
										
											2018-09-03 12:38:51 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-27 14:43:00 +08:00
										 |  |  |     if (fb) { | 
					
						
							|  |  |  |         qemu_fclose(fb); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-17 20:57:43 +08:00
										 |  |  |     /* Hope this not to be too long to loop here */ | 
					
						
							|  |  |  |     qemu_sem_wait(&mis->colo_incoming_sem); | 
					
						
							|  |  |  |     qemu_sem_destroy(&mis->colo_incoming_sem); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-06 21:29:29 +08:00
										 |  |  |     rcu_unregister_thread(); | 
					
						
							| 
									
										
										
										
											2016-10-27 14:42:55 +08:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  | int coroutine_fn colo_incoming_co(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     MigrationIncomingState *mis = migration_incoming_get_current(); | 
					
						
							|  |  |  |     Error *local_err = NULL; | 
					
						
							|  |  |  |     QemuThread th; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     assert(bql_locked()); | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (!migration_incoming_colo_enabled()) { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Make sure all file formats throw away their mutable metadata */ | 
					
						
							|  |  |  |     bdrv_activate_all(&local_err); | 
					
						
							|  |  |  |     if (local_err) { | 
					
						
							|  |  |  |         error_report_err(local_err); | 
					
						
							|  |  |  |         return -EINVAL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread, | 
					
						
							|  |  |  |                        mis, QEMU_THREAD_JOINABLE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mis->colo_incoming_co = qemu_coroutine_self(); | 
					
						
							|  |  |  |     qemu_coroutine_yield(); | 
					
						
							|  |  |  |     mis->colo_incoming_co = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_unlock(); | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  |     /* Wait checkpoint incoming thread exit before free resource */ | 
					
						
							|  |  |  |     qemu_thread_join(&th); | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:25 -05:00
										 |  |  |     bql_lock(); | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-02 10:35:28 -05:00
										 |  |  |     /* We hold the global BQL, so it is safe here */ | 
					
						
							| 
									
										
										
										
											2023-05-15 16:06:40 +03:00
										 |  |  |     colo_release_ram_cache(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } |