commit 5c5df5310f72be4878a71ace47074c54e0d1a27d Author: Jim Fehlig Date: Wed Nov 24 11:48:51 2021 -0700 libxl: Search for virDomainObj in event handler threads libxl can deliver events and invoke callbacks on any application thread calling into libxl. This can cause deadlock in the libvirt libxl driver Thread 19 (Thread 0x7f31411ec700 (LWP 14068) "libvirtd"): #0 0x00007f318520cc7d in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x00007f3185205ed5 in pthread_mutex_lock () from /lib64/libpthread.so.0 #2 0x00007f3189488015 in virMutexLock (m=) at ../../src/util/virthread.c:79 #3 0x00007f3189463f3b in virObjectLock (anyobj=) at ../../src/util/virobject.c:433 #4 0x00007f31894f2f41 in virDomainObjListSearchID (payload=0x7f317400a6d0, name=, data=0x7f31411eaeac) at ../../src/conf/virdomainobjlist.c:105 #5 0x00007f3189437ac5 in virHashSearch (ctable=0x7f3124025a30, iter=iter@entry=0x7f31894f2f30 , data=data@entry=0x7f31411eaeac, name=name@entry=0x0) at ../../src/util/virhash.c:745 #6 0x00007f31894f3919 in virDomainObjListFindByID (doms=0x7f3124025430, id=) at ../../src/conf/virdomainobjlist.c:121 #7 0x00007f3152f292e5 in libxlDomainEventHandler (data=0x7f3124023d80, event=0x7f310c010ae0) at ../../src/libxl/libxl_domain.c:660 #8 0x00007f3152c6ff5d in egc_run_callbacks (egc=egc@entry=0x7f31411eaf50) at libxl_event.c:1427 #9 0x00007f3152c718bd in libxl__egc_cleanup (egc=0x7f31411eaf50) at libxl_event.c:1458 #10 libxl__ao_inprogress (ao=ao@entry=0x7f310c00b8a0, file=file@entry=0x7f3152cce987 "libxl_domain.c", line=line@entry=730, func=func@entry=0x7f3152ccf750 <__func__.22238> "libxl_domain_unpause") at libxl_event.c:2047 #11 0x00007f3152c8c5b8 in libxl_domain_unpause (ctx=0x7f3124015a40, domid=, ao_how=ao_how@entry=0x0) at libxl_domain.c:730 #12 0x00007f3152f2a584 in libxl_domain_unpause_0x041200 (domid=, ctx=) at /usr/include/libxl.h:1756 #13 libxlDomainStart (driver=driver@entry=0x7f3124023d80, vm=vm@entry=0x7f317400a6d0, start_paused=start_paused@entry=false, restore_fd=restore_fd@entry=-1, restore_ver=, restore_ver@entry=2) at ../../src/libxl/libxl_domain.c:1482 #14 0x00007f3152f2a6e3 in libxlDomainStartNew (driver=driver@entry=0x7f3124023d80, vm=vm@entry=0x7f317400a6d0, start_paused=start_paused@entry=false) at ../../src/libxl/libxl_domain.c:1545 #15 0x00007f3152f2a789 in libxlDomainShutdownHandleRestart (driver=0x7f3124023d80, vm=0x7f317400a6d0) at ../../src/libxl/libxl_domain.c:464 #16 0x00007f3152f2a9e4 in libxlDomainShutdownThread (opaque=) at ../../src/libxl/libxl_domain.c:559 #17 0x00007f3189487ee2 in virThreadHelper (data=) at ../../src/util/virthread.c:196 #18 0x00007f3185203539 in start_thread () from /lib64/libpthread.so.0 #19 0x00007f3184f3becf in clone () from /lib64/libc.so.6 Frame 16 runs a thread created to handle domain shutdown processing for domid 28712. In this case the event contained the reboot reason, so the old domain is destroyed and a new one is created by libxlDomainStart new. After starting the domain, it is unpaused by calling libxl_domain_unpause in frame 12. While the thread is running within libxl, libxl takes the opportunity to deliver a pending domain shutdown event for unrelated domid 28710. While searching for the associated virDomainObj by ID, a deadlock is encountered when attempting to lock the virDomainObj for domid 28712, which is already locked since this thread is processing its shutdown event. The deadlock can be avoided by moving the search for a virDomainObj associated with the event domid to the shutdown thread. The same is done for the death thread. Signed-off-by: Jim Fehlig Reviewed-by: Daniel P. Berrangé Reviewed-by: Ján Tomko Index: libvirt-7.10.0/src/libxl/libxl_domain.c =================================================================== --- libvirt-7.10.0.orig/src/libxl/libxl_domain.c +++ libvirt-7.10.0/src/libxl/libxl_domain.c @@ -480,7 +480,6 @@ libxlDomainShutdownHandleRestart(libxlDr struct libxlEventHandlerThreadInfo { libxlDriverPrivate *driver; - virDomainObj *vm; libxl_event *event; }; @@ -489,7 +488,7 @@ static void libxlDomainShutdownThread(void *opaque) { struct libxlEventHandlerThreadInfo *shutdown_info = opaque; - virDomainObj *vm = shutdown_info->vm; + virDomainObj *vm = NULL; libxl_event *ev = shutdown_info->event; libxlDriverPrivate *driver = shutdown_info->driver; virObjectEvent *dom_event = NULL; @@ -499,6 +498,12 @@ libxlDomainShutdownThread(void *opaque) libxl_domain_config_init(&d_config); + vm = virDomainObjListFindByID(driver->domains, ev->domid); + if (!vm) { + /* Nothing to do if we can't find the virDomainObj */ + goto cleanup; + } + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) goto cleanup; @@ -616,12 +621,18 @@ static void libxlDomainDeathThread(void *opaque) { struct libxlEventHandlerThreadInfo *death_info = opaque; - virDomainObj *vm = death_info->vm; + virDomainObj *vm = NULL; libxl_event *ev = death_info->event; libxlDriverPrivate *driver = death_info->driver; virObjectEvent *dom_event = NULL; g_autoptr(libxlDriverConfig) cfg = libxlDriverConfigGet(driver); + vm = virDomainObjListFindByID(driver->domains, ev->domid); + if (!vm) { + /* Nothing to do if we can't find the virDomainObj */ + goto cleanup; + } + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) goto cleanup; @@ -650,7 +661,6 @@ libxlDomainEventHandler(void *data, libx { libxlDriverPrivate *driver = data; libxl_shutdown_reason xl_reason = event->u.domain_shutdown.shutdown_reason; - virDomainObj *vm = NULL; g_autoptr(libxlDriverConfig) cfg = NULL; struct libxlEventHandlerThreadInfo *thread_info = NULL; virThread thread; @@ -671,12 +681,6 @@ libxlDomainEventHandler(void *data, libx if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND) goto cleanup; - vm = virDomainObjListFindByID(driver->domains, event->domid); - if (!vm) { - /* Nothing to do if we can't find the virDomainObj */ - goto cleanup; - } - /* * Start event-specific threads to handle shutdown and death. * They are potentially lengthy operations and we don't want to be @@ -686,7 +690,6 @@ libxlDomainEventHandler(void *data, libx thread_info = g_new0(struct libxlEventHandlerThreadInfo, 1); thread_info->driver = driver; - thread_info->vm = vm; thread_info->event = (libxl_event *)event; thread_name = g_strdup_printf("shutdown-event-%d", event->domid); /* @@ -701,15 +704,14 @@ libxlDomainEventHandler(void *data, libx goto cleanup; } /* - * virDomainObjEndAPI is called in the shutdown thread, where - * libxlEventHandlerThreadInfo and libxl_event are also freed. + * libxlEventHandlerThreadInfo and libxl_event are freed in the + * shutdown thread */ return; } else if (event->type == LIBXL_EVENT_TYPE_DOMAIN_DEATH) { thread_info = g_new0(struct libxlEventHandlerThreadInfo, 1); thread_info->driver = driver; - thread_info->vm = vm; thread_info->event = (libxl_event *)event; thread_name = g_strdup_printf("death-event-%d", event->domid); /* @@ -724,14 +726,13 @@ libxlDomainEventHandler(void *data, libx goto cleanup; } /* - * virDomainObjEndAPI is called in the death thread, where - * libxlEventHandlerThreadInfo and libxl_event are also freed. + * libxlEventHandlerThreadInfo and libxl_event are freed in the + * death thread */ return; } cleanup: - virDomainObjEndAPI(&vm); VIR_FREE(thread_info); cfg = libxlDriverConfigGet(driver); /* Cast away any const */