390 lines
12 KiB
Diff
390 lines
12 KiB
Diff
|
# HG changeset patch
|
||
|
# User Jan Beulich <jbeulich@novell.com>
|
||
|
# Date 1311608606 -3600
|
||
|
# Node ID e8d1c8f074babcb0e4511393106e80a918a38204
|
||
|
# Parent e1717d180897e6e7a04d83a41d86b35ac16912b9
|
||
|
x86-64/MMCFG: pass down firmware (ACPI) reservation status of used memory space
|
||
|
|
||
|
Reserving the MMCFG address range(s) in E820 is specified to only be
|
||
|
optional for the firmware to do. The requirement is to have them
|
||
|
reserved in ACPI resources. Those, however, aren't directly visible to
|
||
|
Xen as they require the ACPI interpreter to be active. Thus, if a
|
||
|
range isn't reserved in E820, we should not completely disable use of
|
||
|
MMCFG on the respective bus range, but rather keep it disabled until
|
||
|
Dom0 can pass down information on the ACPI reservation status (though
|
||
|
a new physdevop hypercall).
|
||
|
|
||
|
Signed-off-by: Jan Beulich <jbeulich@novell.com>
|
||
|
|
||
|
--- a/xen/arch/x86/physdev.c
|
||
|
+++ b/xen/arch/x86/physdev.c
|
||
|
@@ -16,6 +16,10 @@
|
||
|
#include <xsm/xsm.h>
|
||
|
#include <asm/p2m.h>
|
||
|
|
||
|
+#ifdef CONFIG_X86_64
|
||
|
+#include "x86_64/mmconfig.h"
|
||
|
+#endif
|
||
|
+
|
||
|
#ifndef COMPAT
|
||
|
typedef long ret_t;
|
||
|
#endif
|
||
|
@@ -515,6 +519,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
+#ifdef __x86_64__
|
||
|
+ case PHYSDEVOP_pci_mmcfg_reserved: {
|
||
|
+ struct physdev_pci_mmcfg_reserved info;
|
||
|
+
|
||
|
+ ret = -EPERM;
|
||
|
+ if ( !IS_PRIV(current->domain) )
|
||
|
+ break;
|
||
|
+
|
||
|
+ ret = -EFAULT;
|
||
|
+ if ( copy_from_guest(&info, arg, 1) )
|
||
|
+ break;
|
||
|
+
|
||
|
+ ret = pci_mmcfg_reserved(info.address, info.segment,
|
||
|
+ info.start_bus, info.end_bus, info.flags);
|
||
|
+ break;
|
||
|
+ }
|
||
|
+#endif
|
||
|
+
|
||
|
case PHYSDEVOP_restore_msi: {
|
||
|
struct physdev_restore_msi restore_msi;
|
||
|
struct pci_dev *pdev;
|
||
|
--- a/xen/arch/x86/x86_64/mmconfig.h
|
||
|
+++ b/xen/arch/x86/x86_64/mmconfig.h
|
||
|
@@ -84,6 +84,11 @@ extern int pci_mmcfg_config_num;
|
||
|
extern struct acpi_mcfg_allocation *pci_mmcfg_config;
|
||
|
|
||
|
/* function prototypes */
|
||
|
+struct acpi_table_header;
|
||
|
int acpi_parse_mcfg(struct acpi_table_header *header);
|
||
|
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
|
||
|
+ unsigned int start_bus, unsigned int end_bus,
|
||
|
+ unsigned int flags);
|
||
|
int pci_mmcfg_arch_init(void);
|
||
|
-void pci_mmcfg_arch_free(void);
|
||
|
+int pci_mmcfg_arch_enable(unsigned int);
|
||
|
+void pci_mmcfg_arch_disable(unsigned int);
|
||
|
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
|
||
|
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
|
||
|
@@ -22,10 +22,10 @@
|
||
|
#include <asm/e820.h>
|
||
|
#include <asm/msr.h>
|
||
|
#include <asm/msr-index.h>
|
||
|
+#include <public/physdev.h>
|
||
|
|
||
|
#include "mmconfig.h"
|
||
|
|
||
|
-static int __initdata known_bridge;
|
||
|
unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_MMCONF;
|
||
|
|
||
|
static void __init parse_mmcfg(char *s)
|
||
|
@@ -316,26 +316,21 @@ static int __init pci_mmcfg_check_hostbr
|
||
|
return name != NULL;
|
||
|
}
|
||
|
|
||
|
-typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
|
||
|
-
|
||
|
static int __init is_mmconf_reserved(
|
||
|
- check_reserved_t is_reserved,
|
||
|
u64 addr, u64 size, int i,
|
||
|
- typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
|
||
|
+ typeof(pci_mmcfg_config[0]) *cfg)
|
||
|
{
|
||
|
u64 old_size = size;
|
||
|
int valid = 0;
|
||
|
|
||
|
- while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
|
||
|
+ while (!e820_all_mapped(addr, addr + size - 1, E820_RESERVED)) {
|
||
|
size >>= 1;
|
||
|
if (size < (16UL<<20))
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (size >= (16UL<<20) || size == old_size) {
|
||
|
- printk(KERN_NOTICE
|
||
|
- "PCI: MCFG area at %lx reserved in %s\n",
|
||
|
- addr, with_e820?"E820":"ACPI motherboard resources");
|
||
|
+ printk(KERN_NOTICE "PCI: MCFG area at %lx reserved in E820\n", addr);
|
||
|
valid = 1;
|
||
|
|
||
|
if (old_size != size) {
|
||
|
@@ -352,15 +347,16 @@ static int __init is_mmconf_reserved(
|
||
|
return valid;
|
||
|
}
|
||
|
|
||
|
-static void __init pci_mmcfg_reject_broken(void)
|
||
|
+static bool_t __init pci_mmcfg_reject_broken(void)
|
||
|
{
|
||
|
typeof(pci_mmcfg_config[0]) *cfg;
|
||
|
int i;
|
||
|
+ bool_t valid = 1;
|
||
|
|
||
|
if ((pci_mmcfg_config_num == 0) ||
|
||
|
(pci_mmcfg_config == NULL) ||
|
||
|
(pci_mmcfg_config[0].address == 0))
|
||
|
- return;
|
||
|
+ return 0;
|
||
|
|
||
|
cfg = &pci_mmcfg_config[0];
|
||
|
|
||
|
@@ -374,27 +370,25 @@ static void __init pci_mmcfg_reject_brok
|
||
|
size = cfg->end_bus_number + 1 - cfg->start_bus_number;
|
||
|
size <<= 20;
|
||
|
printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
|
||
|
- "segment %hu buses %u - %u\n",
|
||
|
+ "segment %04x buses %02x - %02x\n",
|
||
|
i, (unsigned long)cfg->address, cfg->pci_segment,
|
||
|
(unsigned int)cfg->start_bus_number,
|
||
|
(unsigned int)cfg->end_bus_number);
|
||
|
|
||
|
- if (!is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1))
|
||
|
- goto reject;
|
||
|
+ if (!is_mmconf_reserved(addr, size, i, cfg) ||
|
||
|
+ pci_mmcfg_arch_enable(i)) {
|
||
|
+ pci_mmcfg_arch_disable(i);
|
||
|
+ valid = 0;
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
- return;
|
||
|
-
|
||
|
-reject:
|
||
|
- printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
|
||
|
- pci_mmcfg_arch_free();
|
||
|
- xfree(pci_mmcfg_config);
|
||
|
- pci_mmcfg_config = NULL;
|
||
|
- pci_mmcfg_config_num = 0;
|
||
|
+ return valid;
|
||
|
}
|
||
|
|
||
|
void __init acpi_mmcfg_init(void)
|
||
|
{
|
||
|
+ bool_t valid = 1;
|
||
|
+
|
||
|
/* MMCONFIG disabled */
|
||
|
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
|
||
|
return;
|
||
|
@@ -403,16 +397,17 @@ void __init acpi_mmcfg_init(void)
|
||
|
if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
|
||
|
return;
|
||
|
|
||
|
- /* for late to exit */
|
||
|
- if (known_bridge)
|
||
|
- return;
|
||
|
-
|
||
|
- if (pci_mmcfg_check_hostbridge())
|
||
|
- known_bridge = 1;
|
||
|
+ if (pci_mmcfg_check_hostbridge()) {
|
||
|
+ unsigned int i;
|
||
|
|
||
|
- if (!known_bridge) {
|
||
|
+ pci_mmcfg_arch_init();
|
||
|
+ for (i = 0; i < pci_mmcfg_config_num; ++i)
|
||
|
+ if (pci_mmcfg_arch_enable(i))
|
||
|
+ valid = 0;
|
||
|
+ } else {
|
||
|
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
|
||
|
- pci_mmcfg_reject_broken();
|
||
|
+ pci_mmcfg_arch_init();
|
||
|
+ valid = pci_mmcfg_reject_broken();
|
||
|
}
|
||
|
|
||
|
if ((pci_mmcfg_config_num == 0) ||
|
||
|
@@ -420,9 +415,41 @@ void __init acpi_mmcfg_init(void)
|
||
|
(pci_mmcfg_config[0].address == 0))
|
||
|
return;
|
||
|
|
||
|
- if (pci_mmcfg_arch_init()) {
|
||
|
+ if (valid)
|
||
|
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
|
||
|
+}
|
||
|
+
|
||
|
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
|
||
|
+ unsigned int start_bus, unsigned int end_bus,
|
||
|
+ unsigned int flags)
|
||
|
+{
|
||
|
+ unsigned int i;
|
||
|
+
|
||
|
+ if (flags & ~XEN_PCI_MMCFG_RESERVED)
|
||
|
+ return -EINVAL;
|
||
|
+
|
||
|
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
|
||
|
+ const typeof(pci_mmcfg_config[0]) *cfg = &pci_mmcfg_config[i];
|
||
|
+
|
||
|
+ if (cfg->pci_segment == segment &&
|
||
|
+ cfg->start_bus_number == start_bus &&
|
||
|
+ cfg->end_bus_number == end_bus) {
|
||
|
+ if (cfg->address != address) {
|
||
|
+ printk(KERN_WARNING
|
||
|
+ "Base address presented for segment %04x bus %02x-%02x"
|
||
|
+ " (%08" PRIx64 ") does not match previously obtained"
|
||
|
+ " one (%08" PRIx64 ")\n",
|
||
|
+ segment, start_bus, end_bus, address, cfg->address);
|
||
|
+ return -EIO;
|
||
|
+ }
|
||
|
+ if (flags & XEN_PCI_MMCFG_RESERVED)
|
||
|
+ return pci_mmcfg_arch_enable(i);
|
||
|
+ pci_mmcfg_arch_disable(i);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
}
|
||
|
+
|
||
|
+ return -ENODEV;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
--- a/xen/arch/x86/x86_64/mmconfig_64.c
|
||
|
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
|
||
|
@@ -112,7 +112,8 @@ int pci_mmcfg_write(unsigned int seg, un
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
-static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
|
||
|
+static void __iomem *mcfg_ioremap(const struct acpi_mcfg_allocation *cfg,
|
||
|
+ unsigned int prot)
|
||
|
{
|
||
|
unsigned long virt, size;
|
||
|
|
||
|
@@ -126,19 +127,55 @@ static void __iomem * __init mcfg_iorema
|
||
|
if (map_pages_to_xen(virt,
|
||
|
(cfg->address >> PAGE_SHIFT) +
|
||
|
(cfg->start_bus_number << (20 - PAGE_SHIFT)),
|
||
|
- size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE))
|
||
|
+ size >> PAGE_SHIFT, prot))
|
||
|
return NULL;
|
||
|
|
||
|
return (void __iomem *) virt;
|
||
|
}
|
||
|
|
||
|
+int pci_mmcfg_arch_enable(unsigned int idx)
|
||
|
+{
|
||
|
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
|
||
|
+
|
||
|
+ if (pci_mmcfg_virt[idx].virt)
|
||
|
+ return 0;
|
||
|
+ pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, PAGE_HYPERVISOR_NOCACHE);
|
||
|
+ if (!pci_mmcfg_virt[idx].virt) {
|
||
|
+ printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n",
|
||
|
+ cfg->pci_segment);
|
||
|
+ return -ENOMEM;
|
||
|
+ }
|
||
|
+ printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
|
||
|
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+void pci_mmcfg_arch_disable(unsigned int idx)
|
||
|
+{
|
||
|
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
|
||
|
+
|
||
|
+ pci_mmcfg_virt[idx].virt = NULL;
|
||
|
+ /*
|
||
|
+ * Don't use destroy_xen_mappings() here, or make sure that at least
|
||
|
+ * the necessary L4 entries get populated (so that they get properly
|
||
|
+ * propagated to guest domains' page tables).
|
||
|
+ */
|
||
|
+ mcfg_ioremap(cfg, 0);
|
||
|
+ printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n",
|
||
|
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
|
||
|
+}
|
||
|
+
|
||
|
int __init pci_mmcfg_arch_init(void)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
+ if (pci_mmcfg_virt)
|
||
|
+ return 0;
|
||
|
+
|
||
|
pci_mmcfg_virt = xmalloc_array(struct mmcfg_virt, pci_mmcfg_config_num);
|
||
|
if (pci_mmcfg_virt == NULL) {
|
||
|
printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
|
||
|
+ pci_mmcfg_config_num = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
memset(pci_mmcfg_virt, 0, sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num);
|
||
|
@@ -149,34 +186,5 @@ int __init pci_mmcfg_arch_init(void)
|
||
|
++mmcfg_pci_segment_shift;
|
||
|
}
|
||
|
mmcfg_pci_segment_shift += 20;
|
||
|
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
|
||
|
- pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]);
|
||
|
- if (!pci_mmcfg_virt[i].virt) {
|
||
|
- printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
|
||
|
- "segment %d\n",
|
||
|
- pci_mmcfg_config[i].pci_segment);
|
||
|
- pci_mmcfg_arch_free();
|
||
|
- return 0;
|
||
|
- }
|
||
|
- }
|
||
|
return 1;
|
||
|
}
|
||
|
-
|
||
|
-void __init pci_mmcfg_arch_free(void)
|
||
|
-{
|
||
|
- int i;
|
||
|
-
|
||
|
- if (pci_mmcfg_virt == NULL)
|
||
|
- return;
|
||
|
-
|
||
|
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
|
||
|
- if (pci_mmcfg_virt[i].virt) {
|
||
|
- iounmap(pci_mmcfg_virt[i].virt);
|
||
|
- pci_mmcfg_virt[i].virt = NULL;
|
||
|
- pci_mmcfg_virt[i].cfg = NULL;
|
||
|
- }
|
||
|
- }
|
||
|
-
|
||
|
- xfree(pci_mmcfg_virt);
|
||
|
- pci_mmcfg_virt = NULL;
|
||
|
-}
|
||
|
--- a/xen/arch/x86/x86_64/physdev.c
|
||
|
+++ b/xen/arch/x86/x86_64/physdev.c
|
||
|
@@ -54,6 +54,10 @@
|
||
|
#define physdev_get_free_pirq compat_physdev_get_free_pirq
|
||
|
#define physdev_get_free_pirq_t physdev_get_free_pirq_compat_t
|
||
|
|
||
|
+#define xen_physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved
|
||
|
+CHECK_physdev_pci_mmcfg_reserved;
|
||
|
+#undef xen_physdev_pci_mmcfg_reserved
|
||
|
+
|
||
|
#define COMPAT
|
||
|
#undef guest_handle_okay
|
||
|
#define guest_handle_okay compat_handle_okay
|
||
|
--- a/xen/include/public/physdev.h
|
||
|
+++ b/xen/include/public/physdev.h
|
||
|
@@ -255,6 +255,19 @@ struct physdev_get_free_pirq {
|
||
|
typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
|
||
|
DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
|
||
|
|
||
|
+#define XEN_PCI_MMCFG_RESERVED 0x1
|
||
|
+
|
||
|
+#define PHYSDEVOP_pci_mmcfg_reserved 24
|
||
|
+struct physdev_pci_mmcfg_reserved {
|
||
|
+ uint64_t address;
|
||
|
+ uint16_t segment;
|
||
|
+ uint8_t start_bus;
|
||
|
+ uint8_t end_bus;
|
||
|
+ uint32_t flags;
|
||
|
+};
|
||
|
+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
|
||
|
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
|
||
|
+
|
||
|
/*
|
||
|
* Notify that some PIRQ-bound event channels have been unmasked.
|
||
|
* ** This command is obsolete since interface version 0x00030202 and is **
|
||
|
--- a/xen/include/xlat.lst
|
||
|
+++ b/xen/include/xlat.lst
|
||
|
@@ -60,6 +60,7 @@
|
||
|
! memory_map memory.h
|
||
|
! memory_reservation memory.h
|
||
|
! pod_target memory.h
|
||
|
+? physdev_pci_mmcfg_reserved physdev.h
|
||
|
! sched_poll sched.h
|
||
|
? sched_remote_shutdown sched.h
|
||
|
? sched_shutdown sched.h
|