xen/check_device_status.patch

57 lines
2.3 KiB
Diff
Raw Normal View History

Improve check_device_status to handle HA cases
In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.
Index: xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
@@ -149,7 +149,10 @@ class DevController:
(status, err) = self.waitForBackend(devid)
if status == Timeout:
- self.destroyDevice(devid, False)
+ #Clean timeout backend resource
+ dev = self.convertToDeviceNumber(devid)
+ self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR)
+ self.destroyDevice(devid, True)
raise VmError("Device %s (%s) could not be connected. "
"Hotplug scripts not working." %
(devid, self.deviceClass))
@@ -554,7 +557,17 @@ class DevController:
xswatch(statusPath, hotplugStatusCallback, ev, result)
- ev.wait(DEVICE_CREATE_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_CREATE_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
+ else:
+ result['status'] = Connected
+ break
err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE)
@@ -571,7 +584,12 @@ class DevController:
xswatch(statusPath, deviceDestroyCallback, ev, result)
- ev.wait(DEVICE_DESTROY_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_DESTROY_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is None:
+ result['status'] = Disconnected
+ break
return result['status']