Index: xen-4.1.0-testing/tools/examples/xend-config.sxp =================================================================== --- xen-4.1.0-testing.orig/tools/examples/xend-config.sxp +++ xen-4.1.0-testing/tools/examples/xend-config.sxp @@ -321,6 +321,65 @@ # device assignment could really work properly even after we do this. #(pci-passthrough-strict-check yes) +# Domain Locking +# In a multihost environment, domain locking prevents simultaneously +# running a domain on more than one host. +# +# If enabled, xend will execute a external lock utility (defined below) +# on each domain start and stop event. Disabled by default. Set to yes +# to enable domain locking. +# +#(xend-domain-lock no) + +# Path where domain lock is stored if xend-domain-lock is enabled. +# Note: This path must be accessible to all VM Servers participating +# in domain locking, e.g. by specifying a shared mount point. +# Lock is placed in //. +# Default is /var/lib/xen/images/vm_locks/ +# +#(xend-domain-lock-path /var/lib/images/vm_locks) + +# External locking utility called by xend for acquiring/releasing +# domain lock. By default /etc/xen/scripts/domain-lock will be used +# if xend-domain-lock is set to yes. Set to path of custom locking +# utility to override the default. +# +# Synopsis of lock-util: +# lock-util [-l|-u] -n -i -p path" +# -l Acquire (create) lock +# -u Remove lock +# -n vm-name Name of domain +# -i vm-id Id or UUID of domain +# -p phy-host Name of physical host (dom0) +# path // +# Return 0 on success, non-zero on error. +# +# lock-util [-s] path" +# -s Lock status. If lock is acquired, print any contents +# on stdout and return 0. Return non-zero if lock is +# available. +# path // +# If lock is acquired, print any contents on stdout and return 0. +# Return non-zero if lock is available. +# +# Default lock-util behavior: +# On domain start event, domain-lock will create and flock(1) +# ///lock. Every two seconds it +# will write , , , and to the lock. +# is running counter. +# On domain stop event, domain-lock will unlock and remove +# ///lock. +# +# Note: If xend-domain-lock-path is a cluster-unaware file system, +# administrator intervention may be required to remove stale +# locks. Consider two hosts using NFS for xend-domain-lock-path +# when HostA, running vm1, crashes. HostB could not acquire a +# lock for vm1 since the NFS server holds an exclusive lock +# acquired by HostA. The lock file must be manually removed +# before starting vm1 on HostA. +# +#(xend-domain-lock-utility domain-lock) + # If we have a very big scsi device configuration, start of xend is slow, # because xend scans all the device paths to build its internal PSCSI device # list. If we need only a few devices for assigning to a guest, we can reduce Index: xen-4.1.0-testing/tools/python/xen/xend/XendOptions.py =================================================================== --- xen-4.1.0-testing.orig/tools/python/xen/xend/XendOptions.py +++ xen-4.1.0-testing/tools/python/xen/xend/XendOptions.py @@ -154,6 +154,17 @@ class XendOptions: use loose check automatically if necessary.""" pci_dev_assign_strict_check_default = True + """Default for the flag indicating whether xend should create + a lock file for domains when they are started.""" + xend_domain_lock = 'no' + + """Default domain lock storage path.""" + xend_domain_lock_path_default = '/var/lib/xen/images/vm_locks' + + """Default script to acquire/release domain lock""" + xend_domain_lock_utility = auxbin.scripts_dir() + "/domain-lock" + + def __init__(self): self.configure() @@ -401,6 +412,24 @@ class XendOptions: else: return None + def get_xend_domain_lock(self): + """Get the flag indicating whether xend should create a lock file + for domains when they are started.""" + return self.get_config_bool("xend-domain-lock", self.xend_domain_lock) + + def get_xend_domain_lock_path(self): + """ Get the path for domain lock storage + """ + return self.get_config_string("xend-domain-lock-path", self.xend_domain_lock_path_default) + + def get_xend_domain_lock_utility(self): + s = self.get_config_string('xend-domain-lock-utility') + + if s: + return os.path.join(auxbin.scripts_dir(), s) + else: + return self.xend_domain_lock_utility + def get_vnc_tls(self): return self.get_config_string('vnc-tls', self.xend_vnc_tls) Index: xen-4.1.0-testing/tools/python/xen/xend/XendCheckpoint.py =================================================================== --- xen-4.1.0-testing.orig/tools/python/xen/xend/XendCheckpoint.py +++ xen-4.1.0-testing/tools/python/xen/xend/XendCheckpoint.py @@ -133,6 +133,8 @@ def save(fd, dominfo, network, live, dst dominfo.shutdown('suspend') dominfo.waitForSuspend() if line in ('suspend', 'suspended'): + if checkpoint == False: + dominfo.release_running_lock(domain_name) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) @@ -410,6 +412,7 @@ def restore(xd, fd, dominfo = None, paus if not paused: dominfo.unpause() + dominfo.acquire_running_lock() return dominfo except Exception, exn: dominfo.destroy() Index: xen-4.1.0-testing/tools/hotplug/Linux/Makefile =================================================================== --- xen-4.1.0-testing.orig/tools/hotplug/Linux/Makefile +++ xen-4.1.0-testing/tools/hotplug/Linux/Makefile @@ -22,6 +22,7 @@ XEN_SCRIPTS += vtpm vtpm-delete XEN_SCRIPTS += xen-hotplug-cleanup XEN_SCRIPTS += external-device-migrate XEN_SCRIPTS += vscsi +XEN_SCRIPTS += domain-lock vm-monitor XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh Index: xen-4.1.0-testing/tools/hotplug/Linux/domain-lock =================================================================== --- /dev/null +++ xen-4.1.0-testing/tools/hotplug/Linux/domain-lock @@ -0,0 +1,83 @@ +#!/bin/bash + +basedir=$(dirname "$0") + +usage() { + echo "usage: domain-lock [-l|-u] -n -i -p path" + echo "usage: domain-lock [-s] path" + echo "" + echo "-l lock" + echo "-u unlock" + echo "-s status (default)" + echo "-n Virtual Machine name" + echo "-i Virtual Machine Id or UUID" + echo "-p Virtual Machine Server (physical host) name" + echo "path A per-VM, unique location where external lock will be managed" + exit 1 +} + +remove_lock(){ + local path=$1/lock + local name=$2 + + pid=`ps -efwww | grep vm-monitor | grep $name | awk '{print $2}'` + if [ -n "$pid" ]; then + kill $pid + rm -f $path + fi +} + +get_status(){ + local path=$1/lock + [ -f $path ] || exit 1 + + rc=`flock -xn $path /bin/true` + cat $path + exit $rc +} + +mode="status" + +while getopts ":lusn:i:p:" opt; do + case $opt in + l ) + mode="lock" + ;; + u ) + mode="unlock" + ;; + s ) + mode="status" + ;; + p ) + vm_host=$OPTARG + ;; + n ) + vm_name=$OPTARG + ;; + i ) + vm_uuid=$OPTARG + ;; + \? ) + usage + ;; + esac +done + +shift $(($OPTIND - 1)) +vm_path=$1 + +case $mode in + lock ) + [ -z "$vm_path" ] || [ -z "$vm_name" ] || [ -z "$vm_uuid" ] || [ -z "$vm_host" ] && usage + $basedir/set-lock $vm_path $vm_name $vm_uuid $vm_host + ;; + unlock ) + [ -z "$vm_path" ] || [ -z "$vm_name" ] || [ -z "$vm_uuid" ] || [ -z "$vm_host" ] && usage + remove_lock $vm_path $vm_name $vm_uuid $vm_host + ;; + status ) + [ -z "$vm_path" ] && usage + get_status $vm_path + ;; +esac Index: xen-4.1.0-testing/tools/hotplug/Linux/vm-monitor =================================================================== --- /dev/null +++ xen-4.1.0-testing/tools/hotplug/Linux/vm-monitor @@ -0,0 +1,41 @@ +#!/bin/bash + +basedir=$(dirname "$0") +HA_TICK=2 + +monitor() { + local path=$1 + local name=$2 + local uuid=$3 + local host=$4 + local count=0 + path=$path/lock + + while : + do + echo "name=$name uuid=$uuid host=$host count=$count" > $path + count=$(($count+1)) + sleep $HA_TICK + done& +} + +create_lock() { + local path=$1/lock + local rc=0 + + [ -f $path ] || touch $path + flock -x -w $HA_TICK $path $basedir/vm-monitor $* + rc=$? + if [ $rc -eq 1 ]; then + echo `cat $path` + exit 1 + else + exit $rc + fi +} + +if [ $0 = "$basedir/set-lock" ]; then + create_lock $* +elif [ $0 = "$basedir/vm-monitor" ]; then + monitor $* +fi Index: xen-4.1.0-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.0-testing/tools/python/xen/xend/XendDomainInfo.py @@ -470,6 +470,7 @@ class XendDomainInfo: if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED): try: + self.acquire_running_lock(); XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) @@ -2984,6 +2985,11 @@ class XendDomainInfo: self._stateSet(DOM_STATE_HALTED) self.domid = None # Do not push into _stateSet()! + + try: + self.release_running_lock() + except: + log.exception("Failed to release domain lock.") finally: self.refresh_shutdown_lock.release() @@ -4491,6 +4497,74 @@ class XendDomainInfo: def has_device(self, dev_class, dev_uuid): return (dev_uuid in self.info['%s_refs' % dev_class.lower()]) + # Return name of host contained in lock file. + def get_lock_host(self, path): + fin = os.popen(xoptions.get_xend_domain_lock_utility() + \ + ' -s ' + path, 'r') + hostname = "unknown" + + try: + tokens = fin.readline().split() + for token in tokens: + item = token.split('=') + if item[0] == 'host': + hostname = item[1] + return hostname + finally: + fin.close() + + # Acquire a lock for the domain. No-op if domain locking is turned off. + def acquire_running_lock(self): + if not xoptions.get_xend_domain_lock(): + return + + log.debug("Acquiring lock for domain %s" % self.info['name_label']) + path = xoptions.get_xend_domain_lock_path() + path = os.path.join(path, self.get_uuid()) + + try: + if not os.path.exists(path): + mkdir.parents(path, stat.S_IRWXU) + except: + log.exception("%s could not be created." % path) + raise XendError("%s could not be created." % path) + + status = os.system('%s -l -p %s -n %s -i %s %s' % \ + (xoptions.get_xend_domain_lock_utility(), \ + XendNode.instance().get_name(), \ + self.info['name_label'], \ + self.info['uuid'], \ + path)) + if status != 0: + log.debug("Failed to aqcuire lock: status = %d" % status) + raise XendError("The VM is locked and appears to be running on host %s." % self.get_lock_host(path)) + + # Release lock for domain. No-op if domain locking is turned off. + def release_running_lock(self, name = None): + if not xoptions.get_xend_domain_lock(): + return + + dom_name = self.info['name_label'] + if name: + dom_name = name + log.debug("Releasing lock for domain %s" % dom_name) + + path = xoptions.get_xend_domain_lock_path() + path = os.path.join(path, self.get_uuid()) + status = os.system('%s -u -p %s -n %s -i %s %s' % \ + (xoptions.get_xend_domain_lock_utility(), \ + XendNode.instance().get_name(), \ + dom_name, \ + self.info['uuid'], \ + path)) + if status != 0: + log.exception("Failed to release lock: status = %s" % status) + try: + if len(os.listdir(path)) == 0: + shutil.rmtree(path) + except: + log.exception("Failed to remove unmanaged directory %s." % path) + def __str__(self): return '' % \ (str(self.domid), self.info['name_label'],