Improve check_device_status to handle HA cases In HA environment, sometimes xenstore status has changed but ev.wait() cannot get the signal, it will wait until timeout, thus incorrect device status is returned. To fix this problem, we do not depend on ev.wait() result, but read xenstore directly to get correct device status. diff -r ce65e0e03a57 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Fri Aug 27 16:53:00 2010 +0800 +++ b/tools/python/xen/xend/server/DevController.py Fri Aug 27 17:13:32 2010 +0800 @@ -149,7 +149,10 @@ (status, err) = self.waitForBackend(devid) if status == Timeout: - self.destroyDevice(devid, False) + #Clean timeout backend resource + dev = self.convertToDeviceNumber(devid) + self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR) + self.destroyDevice(devid, True) raise VmError("Device %s (%s) could not be connected. " "Hotplug scripts not working." % (devid, self.deviceClass)) @@ -554,7 +557,17 @@ xswatch(statusPath, hotplugStatusCallback, ev, result) - ev.wait(DEVICE_CREATE_TIMEOUT) + for i in range(1, 50): + ev.wait(DEVICE_CREATE_TIMEOUT/50) + status = xstransact.Read(statusPath) + if status is not None: + if status == HOTPLUG_STATUS_ERROR: + result['status'] = Error + elif status == HOTPLUG_STATUS_BUSY: + result['status'] = Busy + else: + result['status'] = Connected + break err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE) @@ -571,7 +584,12 @@ xswatch(statusPath, deviceDestroyCallback, ev, result) - ev.wait(DEVICE_DESTROY_TIMEOUT) + for i in range(1, 50): + ev.wait(DEVICE_DESTROY_TIMEOUT/50) + status = xstransact.Read(statusPath) + if status is None: + result['status'] = Disconnected + break return result['status']