Improve check_device_status to handle HA cases In HA environment, sometimes xenstore status has changed but ev.wait() cannot get the signal, it will wait until timeout, thus incorrect device status is returned. To fix this problem, we do not depend on ev.wait() result, but read xenstore directly to get correct device status. Index: xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py =================================================================== --- xen-4.2.0-testing.orig/tools/python/xen/xend/server/DevController.py +++ xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py @@ -149,7 +149,10 @@ class DevController: (status, err) = self.waitForBackend(devid) if status == Timeout: - self.destroyDevice(devid, False) + #Clean timeout backend resource + dev = self.convertToDeviceNumber(devid) + self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR) + self.destroyDevice(devid, True) raise VmError("Device %s (%s) could not be connected. " "Hotplug scripts not working." % (devid, self.deviceClass)) @@ -554,7 +557,17 @@ class DevController: xswatch(statusPath, hotplugStatusCallback, ev, result) - ev.wait(DEVICE_CREATE_TIMEOUT) + for i in range(1, 50): + ev.wait(DEVICE_CREATE_TIMEOUT/50) + status = xstransact.Read(statusPath) + if status is not None: + if status == HOTPLUG_STATUS_ERROR: + result['status'] = Error + elif status == HOTPLUG_STATUS_BUSY: + result['status'] = Busy + else: + result['status'] = Connected + break err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE) @@ -571,7 +584,12 @@ class DevController: xswatch(statusPath, deviceDestroyCallback, ev, result) - ev.wait(DEVICE_DESTROY_TIMEOUT) + for i in range(1, 50): + ev.wait(DEVICE_DESTROY_TIMEOUT/50) + status = xstransact.Read(statusPath) + if status is None: + result['status'] = Disconnected + break return result['status']