xen/check_device_status.patch

56 lines
2.2 KiB
Diff
Raw Normal View History

Improve check_device_status to handle HA cases
In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.
diff -r ce65e0e03a57 tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py Fri Aug 27 16:53:00 2010 +0800
+++ b/tools/python/xen/xend/server/DevController.py Fri Aug 27 17:13:32 2010 +0800
@@ -149,7 +149,10 @@
(status, err) = self.waitForBackend(devid)
if status == Timeout:
- self.destroyDevice(devid, False)
+ #Clean timeout backend resource
+ dev = self.convertToDeviceNumber(devid)
+ self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR)
+ self.destroyDevice(devid, True)
raise VmError("Device %s (%s) could not be connected. "
"Hotplug scripts not working." %
(devid, self.deviceClass))
@@ -554,7 +557,17 @@
xswatch(statusPath, hotplugStatusCallback, ev, result)
- ev.wait(DEVICE_CREATE_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_CREATE_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
+ else:
+ result['status'] = Connected
+ break
err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE)
@@ -571,7 +584,12 @@
xswatch(statusPath, deviceDestroyCallback, ev, result)
- ev.wait(DEVICE_DESTROY_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_DESTROY_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is None:
+ result['status'] = Disconnected
+ break
return result['status']