pci passthrough: handle managed pci devices Handle managed pci devices for libvirt usage. If a pci device is set "managed=1", it will be made assignable (unbound from original driver and bind to pcistub driver) before vm start and reattach to original driver after vm shut off. FATE#313570 Note: This patch was rejected upstream since xend is deprecated. See the following thread for details http://lists.xen.org/archives/html/xen-devel/2013-01/msg01145.html Signed-off-by: Chunyan Liu Index: xen-4.4.0-testing/tools/python/xen/util/pci.py =================================================================== --- xen-4.4.0-testing.orig/tools/python/xen/util/pci.py +++ xen-4.4.0-testing/tools/python/xen/util/pci.py @@ -20,6 +20,8 @@ from xen.xend import sxp from xen.xend.XendConstants import AUTO_PHP_SLOT from xen.xend.XendSXPDev import dev_dict_to_sxp from xen.xend.XendLogging import log +from xen.xend.xenstore.xstransact import xstransact +from xen.xend.XendError import XendError # for 2.3 compatibility try: @@ -27,9 +29,11 @@ try: except NameError: from sets import Set as set +XS_PCIBACK_PATH = '/xm/pciback' PROC_PCI_PATH = '/proc/bus/pci/devices' PROC_PCI_NUM_RESOURCES = 7 +SYSFS_PCI_DRVS_PATH = 'bus/pci/drivers' SYSFS_PCI_DEVS_PATH = '/bus/pci/devices' SYSFS_PCI_DEV_RESOURCE_PATH = '/resource' SYSFS_PCI_DEV_CONFIG_PATH = '/config' @@ -161,7 +165,7 @@ def PCI_BDF(domain, bus, slot, func): def check_pci_opts(opts): def f((k, v)): - if k not in ['msitranslate', 'power_mgmt'] or \ + if k not in ['msitranslate', 'power_mgmt', 'managed'] or \ not v.lower() in ['0', '1', 'yes', 'no']: raise PciDeviceParseError('Invalid pci option %s=%s: ' % (k, v)) @@ -427,6 +431,9 @@ def __pci_dict_to_fmt_str(fmt, dev): def pci_dict_to_bdf_str(dev): return __pci_dict_to_fmt_str('%04x:%02x:%02x.%01x', dev) +def pci_dict_to_xs_bdf_str(dev): + return __pci_dict_to_fmt_str('%04x-%02x-%02x-%01x', dev) + def pci_dict_to_xc_str(dev): return __pci_dict_to_fmt_str('0x%x, 0x%x, 0x%x, 0x%x', dev) @@ -561,6 +568,115 @@ def find_all_assignable_devices(): dev_list = dev_list + [dev] return dev_list +def pci_assignable_add(dev): + '''detach pci device from driver that we need to unbind from and rebind + to pciback driver, then it can be assigned to guest. + ''' + sysfs_mnt = find_sysfs_mnt() + pcidev_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH + pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH + + # See if the device exists + pci_bdf = pci_dict_to_bdf_str(dev) + path = pcidev_path + '/' + pci_bdf + if not os.path.exists(path): + log.debug("Pci device %s doesn't exist" % pci_bdf) + return -1 + + # Check to see if it's already assigned to pciback + path = pciback_path + '/' + pci_bdf + if os.path.exists(path): + log.debug("Pci device %s is already assigned to pciback" % pci_bdf) + return 0 + + # Check to see if there's already a driver that we need to unbind from + path = pcidev_path + '/' + pci_bdf + '/driver' + drv_path = None + if os.path.exists(path): + drv_path = os.path.realpath(path).replace(" ", "\ ") + cmd = 'echo %s > %s/unbind' % (pci_bdf, drv_path) + if os.system(cmd): + log.debug("Couldn't unbind device") + return -1; + + # Store driver_path for rebinding to dom0 + if drv_path is not None: + xs_pci_bdf = pci_dict_to_xs_bdf_str(dev) + path = XS_PCIBACK_PATH + '/' + xs_pci_bdf + xstransact.Mkdir(path) + xstransact.Write(path, 'driver_path', drv_path) + else: + log.debug("Not bound to a driver, will not be rebound") + + # Bind to pciback + try: + # Scan through /sys/.../pciback/slots looking for pcidev's BDF + slots = os.popen('cat %s/slots' % pciback_path).read() + if re.search(pci_bdf, slots) is None: + # write bdf to new_slot + cmd = 'echo %s > %s/new_slot' % (pci_bdf, pciback_path) + if os.system(cmd): + raise XendError("Couldn't add device to pciback new_slot") + + # Bind to pciback + cmd = 'echo %s > %s/bind' % (pci_bdf, pciback_path) + if os.system(cmd): + raise XendError("Couldn't bind device to pciback") + except XendError: + # rebind to original driver + if drv_path is not None: + log.debug("Rebind to original driver") + cmd = 'echo %s > %s/bind' % (pci_bdf, drv_path) + if os.system(cmd): + log.debug("Failed to rebind") + return -1 + + return 0 + +def pci_assignable_remove(dev): + '''unbind pci device from pciback, and rebind to host pci driver where it + was detached from in pci-assignable-add. + ''' + sysfs_mnt = find_sysfs_mnt() + pcidrv_path = sysfs_mnt + SYSFS_PCI_DRVS_PATH + pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH + pci_bdf = pci_dict_to_bdf_str(dev) + + # Unbind from pciback + path = pciback_path + '/' + pci_bdf + if os.path.exists(path): + # unbind + cmd = 'echo %s > %s/unbind' % (pci_bdf, pciback_path) + if os.system(cmd): + log.debug("Couldn't unbind device to pciback") + return -1 + + # remove slots if necessary + slots = os.popen('cat %s/slots' % pciback_path).read() + if re.search(pci_bdf, slots): + # write bdf to remove_slot + cmd = 'echo %s > %s/remove_slot' % (pci_bdf, pciback_path) + if os.system(cmd): + log.debug("Couldn't remove pciback slot") + return -1 + else: + log.debug("Not bound to pciback") + + # Rebind if necessary + xs_pci_bdf = pci_dict_to_xs_bdf_str(dev) + path = XS_PCIBACK_PATH + '/' + xs_pci_bdf + drv_path = xstransact.Read(path, 'driver_path') + if drv_path: + cmd = 'echo %s > %s/bind' % (pci_bdf, drv_path) + if os.system(cmd): + log.debug("Couldn't rebind to driver %s" % drv_path) + return -1 + xstransact.Remove(path) + else: + log.debug("Counldn't find path for original driver. Not rebinding") + + return 0 + def transform_list(target, src): ''' src: its element is pci string (Format: xxxx:xx:xx.x). target: its element is pci string, or a list of pci string. Index: xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.4.0-testing/tools/python/xen/xend/XendDomainInfo.py @@ -305,7 +305,8 @@ def dom_get(dom): return None from xen.xend.server.pciif import parse_pci_name, PciDevice,\ - get_assigned_pci_devices, get_all_assigned_pci_devices + get_assigned_pci_devices, get_all_assigned_pci_devices,\ + prepare_host_pci_devices, reattach_host_pci_devices def do_FLR(domid, is_hvm): @@ -319,6 +320,20 @@ def do_FLR(domid, is_hvm): "parse it's resources - "+str(e)) dev.do_FLR(is_hvm, xoptions.get_pci_dev_assign_strict_check()) +def prepare_domain_pci_devices(domconfig): + ordered_refs = domconfig.ordered_device_refs() + for dev_uuid in ordered_refs: + devclass, devconfig = domconfig['devices'][dev_uuid] + if devclass == 'pci': + prepare_host_pci_devices(devconfig) + +def reattach_domain_pci_devices(domconfig): + ordered_refs = domconfig.ordered_device_refs() + for dev_uuid in ordered_refs: + devclass, devconfig = domconfig['devices'][dev_uuid] + if devclass == 'pci': + reattach_host_pci_devices(devconfig) + class XendDomainInfo: """An object represents a domain. @@ -472,6 +487,7 @@ class XendDomainInfo: if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED): try: + prepare_domain_pci_devices(self.info); XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) @@ -498,6 +514,7 @@ class XendDomainInfo: state = self._stateGet() if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED): try: + prepare_domain_pci_devices(self.info) self._constructDomain() try: @@ -714,6 +731,8 @@ class XendDomainInfo: the device. """ + if self.domid is None: + return self.iommu_check_pod_mode() # Test whether the devices can be assigned @@ -853,6 +872,9 @@ class XendDomainInfo: if self.domid is not None: try: + if dev_type == 'pci': + prepare_host_pci_devices(dev_config_dict) + dev_config_dict['devid'] = devid = \ self._createDevice(dev_type, dev_config_dict) if dev_type == 'tap2': @@ -866,6 +888,7 @@ class XendDomainInfo: if dev_type == 'pci': for dev in dev_config_dict['devs']: XendAPIStore.deregister(dev['uuid'], 'DPCI') + reattach_host_pci_devices(dev_config_dict) elif dev_type == 'vscsi': for dev in dev_config_dict['devs']: XendAPIStore.deregister(dev['uuid'], 'DSCSI') @@ -910,6 +933,10 @@ class XendDomainInfo: dev_config = pci_convert_sxp_to_dict(dev_sxp) dev = dev_config['devs'][0] + # For attach only. For boot, prepare work has been done already in earlier stage. + if self.domid is not None and pci_state == 'Initialising' and pci_sub_state != 'Booting': + prepare_host_pci_devices(dev_config) + stubdomid = self.getStubdomDomid() # Do HVM specific processing if self.info.is_hvm(): @@ -986,6 +1013,9 @@ class XendDomainInfo: new_dev_sxp = dev_control.configuration(devid) self.info.device_update(dev_uuid, new_dev_sxp) + if pci_state == 'Closing': + reattach_host_pci_devices(dev_config) + # If there is no device left, destroy pci and remove config. if num_devs == 0: if self.info.is_hvm(): @@ -3175,6 +3205,7 @@ class XendDomainInfo: log.debug("%s KiB need to add to Memory pool" %self.alloc_mem) MemoryPool.instance().increase_memory(self.alloc_mem) + reattach_domain_pci_devices(self.info) self._cleanup_phantom_devs(paths) self._cleanupVm() Index: xen-4.4.0-testing/tools/python/xen/xend/server/pciif.py =================================================================== --- xen-4.4.0-testing.orig/tools/python/xen/xend/server/pciif.py +++ xen-4.4.0-testing/tools/python/xen/xend/server/pciif.py @@ -86,6 +86,48 @@ def get_all_assigned_pci_devices(domid = pci_str_list = pci_str_list + get_assigned_pci_devices(int(d)) return pci_str_list +def reattach_host_pci_devices(devconfig): + pci_dev_list = devconfig.get('devs', []) + for pci_dev in pci_dev_list: + managed = 0 + pci_opts_config = pci_dev.get('opts', []) + for opt in pci_opts_config: + if opt[0] == 'managed': + managed = opt[1] + if managed: + if pci_assignable_remove(pci_dev) != 0: + raise VmError('pci_assignable_remove failed') + +def detach_host_pci_devices(devconfig): + pci_dev_list = devconfig.get('devs', []) + reattach = 0 + for pci_dev in pci_dev_list: + managed = 0 + pci_opts_config = pci_dev.get('opts', []) + for opt in pci_opts_config: + if opt[0] == 'managed': + managed = opt[1] + if managed: + if pci_assignable_add(pci_dev) != 0: + log.debug('pci_assignable_add failed') + reattach = 1 + break + + if reattach: + reattach_host_pci_devices(devconfig) + raise VmError('detach_host_pci_devices failed') + +def prepare_host_pci_devices(devconfig): + # Test whether the device used by other domain + pci_dev_list = devconfig.get('devs', []) + for pci_dev in pci_dev_list: + pci_name = pci_dict_to_bdf_str(pci_dev) + if pci_name in get_all_assigned_pci_devices(): + raise VmError("failed to assign device %s that has" + " already been assigned to other domain." % pci_name) + # Detach 'managed' devices + detach_host_pci_devices(devconfig) + class PciController(DevController): def __init__(self, vm):