SHA256
1
0
forked from pool/qemu

Accepting request 762501 from home:bfrogers:branches:Virtualization

- Add Cooperlake vcpu model (jira-SLE-10195)
  i386-Add-MSR-feature-bit-for-MDS-NO.patch
  i386-Add-macro-for-stibp.patch
  i386-Add-new-CPU-model-Cooperlake.patch
  target-i386-Add-new-bit-definitions-of-M.patch
  target-i386-Add-missed-features-to-Coope.patch
- Add HMAT support (jira-SLE-10228) (the test case for this series
  isn't included because we aren't set up to handle binary patches)
  numa-Extend-CLI-to-provide-initiator-inf.patch
  numa-Extend-CLI-to-provide-memory-latenc.patch
  numa-Extend-CLI-to-provide-memory-side-c.patch
  hmat-acpi-Build-Memory-Proximity-Domain-.patch
  hmat-acpi-Build-System-Locality-Latency-.patch
  hmat-acpi-Build-Memory-Side-Cache-Inform.patch
  tests-numa-Add-case-for-QMP-build-HMAT.patch

OBS-URL: https://build.opensuse.org/request/show/762501
OBS-URL: https://build.opensuse.org/package/show/Virtualization/qemu?expand=0&rev=520
This commit is contained in:
Bruce Rogers 2020-01-09 17:59:25 +00:00 committed by Git OBS Bridge
parent 076d13f46d
commit 4e3aa638c5
17 changed files with 2318 additions and 47 deletions

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6a42da9e52b11e10e52dffebda8d0b7fd1c6b813e83c1758ac50020b18edc41d
size 29328
oid sha256:d7a9c78df502f34c7e09f81581ee0d59a896b8282135778d22af1cd27807553f
size 49832

View File

@ -0,0 +1,258 @@
From: Liu Jingqi <jingqi.liu@intel.com>
Date: Fri, 13 Dec 2019 09:19:25 +0800
Subject: hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
Git commit: e6f123c3b81241be33f1b763d0ff8b36d1ae9c1e
References: JIRA-SLE-10228
HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
(HMAT). The specification references below link:
http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
It describes the memory attributes, such as memory side cache
attributes and bandwidth and latency details, related to the
Memory Proximity Domain. The software is
expected to use this information as hint for optimization.
This structure describes Memory Proximity Domain Attributes by memory
subsystem and its associativity with processor proximity domain as well as
hint for memory usage.
In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Daniel Black <daniel@linux.ibm.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-5-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
hw/acpi/Kconfig | 7 ++-
hw/acpi/Makefile.objs | 1 +
hw/acpi/hmat.c | 99 +++++++++++++++++++++++++++++++++++++++++++
hw/acpi/hmat.h | 42 ++++++++++++++++++
hw/i386/acpi-build.c | 5 +++
5 files changed, 152 insertions(+), 2 deletions(-)
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e86e62256bf274b554938b..54209c6f2f17d4ca0a737cb25403 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
select ACPI_NVDIMM
select ACPI_CPU_HOTPLUG
select ACPI_MEMORY_HOTPLUG
+ select ACPI_HMAT
config ACPI_X86_ICH
bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
bool
depends on ACPI
+config ACPI_HMAT
+ bool
+ depends on ACPI
+
config ACPI_PCI
bool
depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
depends on PC
config ACPI_HW_REDUCED
- bool
- depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 655a9c197341fed6fcea2062a30c..517bd88704769d8605dde18a6776 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-y += acpi_interface.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 0000000000000000000000000000000000000000..9ff79308a497fe40a1b0a2f9a043ad3bebb2c3cb
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,99 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+ uint32_t initiator, uint32_t mem_node)
+{
+
+ /* Memory Proximity Domain Attributes Structure */
+ /* Type */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 40, 4);
+ /* Flags */
+ build_append_int_noprefix(table_data, flags, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Proximity Domain for the Attached Initiator */
+ build_append_int_noprefix(table_data, initiator, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, mem_node, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+ /*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+}
+
+/* Build HMAT sub table structures */
+static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
+{
+ uint16_t flags;
+ int i;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ flags = 0;
+
+ if (numa_state->nodes[i].initiator < MAX_NODES) {
+ flags |= HMAT_PROXIMITY_INITIATOR_VALID;
+ }
+
+ build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
+ }
+}
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
+{
+ int hmat_start = table_data->len;
+
+ /* reserve space for HMAT header */
+ acpi_data_push(table_data, 40);
+
+ hmat_build_table_structs(table_data, numa_state);
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + hmat_start),
+ "HMAT", table_data->len - hmat_start, 2, NULL, NULL);
+}
diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
new file mode 100644
index 0000000000000000000000000000000000000000..437dbc6872e82e4c1ae42a9ff16299465eec052f
--- /dev/null
+++ b/hw/acpi/hmat.h
@@ -0,0 +1,42 @@
+/*
+ * HMAT ACPI Implementation Header
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HMAT_H
+#define HMAT_H
+
+#include "hw/acpi/aml-build.h"
+
+/*
+ * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure,
+ * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in
+ * the Proximity Domain for the Attached Initiator field is valid.
+ * Other bits reserved.
+ */
+#define HMAT_PROXIMITY_INITIATOR_VALID 0x1
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state);
+
+#endif
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 12ff55fcfb543208c18ba44d569e..90a9c2ce6f8c01221efc56f63f79 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -67,6 +67,7 @@
#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
+#include "hw/acpi/hmat.h"
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
* -M pc-i440fx-2.0. Even if the actual amount of AML generated grows
@@ -2834,6 +2835,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_slit(tables_blob, tables->linker, machine);
}
+ if (machine->numa_state->hmat_enabled) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_hmat(tables_blob, tables->linker, machine->numa_state);
+ }
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);

View File

@ -0,0 +1,122 @@
From: Liu Jingqi <jingqi.liu@intel.com>
Date: Fri, 13 Dec 2019 09:19:27 +0800
Subject: hmat acpi: Build Memory Side Cache Information Structure(s)
Git commit: a9c2b841af002db6e21e1297c9026b63fc22c875
References: JIRA-SLE-10228
This structure describes memory side cache information for memory
proximity domains if the memory side cache is present and the
physical device forms the memory side cache.
The software could use this information to effectively place
the data in memory to maximize the performance of the system
memory that use the memory side cache.
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Daniel Black <daniel@linux.ibm.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-7-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
hw/acpi/hmat.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 68 insertions(+), 1 deletion(-)
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 4635d45deeccd34659f6c8325d66..7c24bb53719e497d5cc6cf3f262e 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
g_free(entry_list);
}
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+ /*
+ * Cache Attributes: Bits [3:0] Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+ uint32_t cache_attr = total_levels;
+
+ /* Bits [7:4] : Cache Level described in this structure */
+ cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+ /* Bits [11:8] - Cache Associativity */
+ cache_attr |= (uint32_t) hmat_cache->associativity << 8;
+
+ /* Bits [15:12] - Write Policy */
+ cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+ /* Bits [31:16] - Cache Line size in bytes */
+ cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+ /* Type */
+ build_append_int_noprefix(table_data, 2, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 32, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /* Memory Side Cache Size */
+ build_append_int_noprefix(table_data, hmat_cache->size, 8);
+ /* Cache Attributes */
+ build_append_int_noprefix(table_data, cache_attr, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+ build_append_int_noprefix(table_data, 0, 2);
+}
+
/* Build HMAT sub table structures */
static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
{
uint16_t flags;
uint32_t num_initiator = 0;
uint32_t initiator_list[MAX_NODES];
- int i, hierarchy, type;
+ int i, hierarchy, type, cache_level, total_levels;
HMAT_LB_Info *hmat_lb;
+ NumaHmatCacheOptions *hmat_cache;
for (i = 0; i < numa_state->num_nodes; i++) {
flags = 0;
@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
}
}
}
+
+ /*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ total_levels = 0;
+ for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+ if (numa_state->hmat_cache[i][cache_level]) {
+ total_levels++;
+ }
+ }
+ for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+ hmat_cache = numa_state->hmat_cache[i][cache_level];
+ if (hmat_cache) {
+ build_hmat_cache(table_data, total_levels, hmat_cache);
+ }
+ }
+ }
}
void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)

View File

@ -0,0 +1,159 @@
From: Liu Jingqi <jingqi.liu@intel.com>
Date: Fri, 13 Dec 2019 09:19:26 +0800
Subject: hmat acpi: Build System Locality Latency and Bandwidth Information
Structure(s)
Git commit: 4586a2cb833f80b19c80ebe364a005ac2fa0974a
References: JIRA-SLE-10228
This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-6-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
hw/acpi/hmat.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 103 insertions(+), 1 deletion(-)
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 9ff79308a497fe40a1b0a2f9a043..4635d45deeccd34659f6c8325d66 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -25,6 +25,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "sysemu/numa.h"
#include "hw/acpi/hmat.h"
@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags,
build_append_int_noprefix(table_data, 0, 8);
}
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+ uint32_t num_initiator, uint32_t num_target,
+ uint32_t *initiator_list)
+{
+ int i, index;
+ HMAT_LB_Data *lb_data;
+ uint16_t *entry_list;
+ uint32_t base;
+ /* Length in bytes for entire structure */
+ uint32_t lb_length
+ = 32 /* Table length upto and including Entry Base Unit */
+ + 4 * num_initiator /* Initiator Proximity Domain List */
+ + 4 * num_target /* Target Proximity Domain List */
+ + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+ /* Type */
+ build_append_int_noprefix(table_data, 1, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, lb_length, 4);
+ /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+ assert(!(hmat_lb->hierarchy >> 4));
+ build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+ /* Data Type */
+ build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Number of Initiator Proximity Domains (s) */
+ build_append_int_noprefix(table_data, num_initiator, 4);
+ /* Number of Target Proximity Domains (t) */
+ build_append_int_noprefix(table_data, num_target, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+
+ /* Entry Base Unit */
+ if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+ /* Convert latency base from nanoseconds to picosecond */
+ base = hmat_lb->base * 1000;
+ } else {
+ /* Convert bandwidth base from Byte to Megabyte */
+ base = hmat_lb->base / MiB;
+ }
+ build_append_int_noprefix(table_data, base, 8);
+
+ /* Initiator Proximity Domain List */
+ for (i = 0; i < num_initiator; i++) {
+ build_append_int_noprefix(table_data, initiator_list[i], 4);
+ }
+
+ /* Target Proximity Domain List */
+ for (i = 0; i < num_target; i++) {
+ build_append_int_noprefix(table_data, i, 4);
+ }
+
+ /* Latency or Bandwidth Entries */
+ entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+ index = lb_data->initiator * num_target + lb_data->target;
+
+ entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+ }
+
+ for (i = 0; i < num_initiator * num_target; i++) {
+ build_append_int_noprefix(table_data, entry_list[i], 2);
+ }
+
+ g_free(entry_list);
+}
+
/* Build HMAT sub table structures */
static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
{
uint16_t flags;
- int i;
+ uint32_t num_initiator = 0;
+ uint32_t initiator_list[MAX_NODES];
+ int i, hierarchy, type;
+ HMAT_LB_Info *hmat_lb;
for (i = 0; i < numa_state->num_nodes; i++) {
flags = 0;
@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
}
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_state->nodes[i].has_cpu) {
+ initiator_list[num_initiator++] = i;
+ }
+ }
+
+ /*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+ for (hierarchy = HMAT_LB_MEM_MEMORY;
+ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) {
+ for (type = HMAT_LB_DATA_ACCESS_LATENCY;
+ type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
+ hmat_lb = numa_state->hmat_lb[hierarchy][type];
+
+ if (hmat_lb && hmat_lb->list->len) {
+ build_hmat_lb(table_data, hmat_lb, num_initiator,
+ numa_state->num_nodes, initiator_list);
+ }
+ }
+ }
}
void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)

View File

@ -0,0 +1,32 @@
From: Cathy Zhang <cathy.zhang@intel.com>
Date: Tue, 22 Oct 2019 15:35:26 +0800
Subject: i386: Add MSR feature bit for MDS-NO
Git commit: 77b168d221191156c47fcd8d1c47329dfdb9439e
References: JIRA-SLE-10195
Define MSR_ARCH_CAP_MDS_NO in the IA32_ARCH_CAPABILITIES MSR to allow
CPU models to report the feature when host supports it.
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <1571729728-23284-2-git-send-email-cathy.zhang@intel.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
target/i386/cpu.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cde2a16b941adeb1123d5d7411f3..39d37e12256069b92c7998590849 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -838,6 +838,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define MSR_ARCH_CAP_RSBA (1U << 2)
#define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3)
#define MSR_ARCH_CAP_SSB_NO (1U << 4)
+#define MSR_ARCH_CAP_MDS_NO (1U << 5)
#define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5)

View File

@ -0,0 +1,35 @@
From: Cathy Zhang <cathy.zhang@intel.com>
Date: Tue, 22 Oct 2019 15:35:27 +0800
Subject: i386: Add macro for stibp
Git commit: 5af514d0cb314f43bc53f2aefb437f6451d64d0c
References: JIRA-SLE-10195
stibp feature is already added through the following commit.
https://github.com/qemu/qemu/commit/0e8916582991b9fd0b94850a8444b8b80d0a0955
Add a macro for it to allow CPU models to report it when host supports.
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <1571729728-23284-3-git-send-email-cathy.zhang@intel.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
target/i386/cpu.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 39d37e12256069b92c7998590849..af282936a785a25f651d0db1a8cf 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -771,6 +771,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3)
/* Speculation Control */
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
+/* Single Thread Indirect Branch Predictors */
+#define CPUID_7_0_EDX_STIBP (1U << 27)
/* Arch Capabilities */
#define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29)
/* Core Capability */

View File

@ -0,0 +1,94 @@
From: Cathy Zhang <cathy.zhang@intel.com>
Date: Tue, 22 Oct 2019 15:35:28 +0800
Subject: i386: Add new CPU model Cooperlake
Git commit: 22a866b6166db5caa4abaa6e656c2a431fa60726
References: JIRA-SLE-10195
Cooper Lake is intel's successor to Cascade Lake, the new
CPU model inherits features from Cascadelake-Server, while
add one platform associated new feature: AVX512_BF16. Meanwhile,
add STIBP for speculative execution.
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <1571729728-23284-4-git-send-email-cathy.zhang@intel.com>
Reviewed-by: Bruce Rogers <brogers@suse.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 69f518a21a9b625269b15d9e8ad3..de828e29d8d6a35c1f03bc4a456a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3159,6 +3159,66 @@ static X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .name = "Cooperlake",
+ .level = 0xd,
+ .vendor = CPUID_VENDOR_INTEL,
+ .family = 6,
+ .model = 85,
+ .stepping = 10,
+ .features[FEAT_1_EDX] =
+ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 |
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 |
+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE |
+ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP |
+ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
+ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
+ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB |
+ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD |
+ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT,
+ .features[FEAT_7_0_ECX] =
+ CPUID_7_0_ECX_PKU |
+ CPUID_7_0_ECX_AVX512VNNI,
+ .features[FEAT_7_0_EDX] =
+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES,
+ .features[FEAT_ARCH_CAPABILITIES] =
+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO,
+ .features[FEAT_7_1_EAX] =
+ CPUID_7_1_EAX_AVX512_BF16,
+ /*
+ * Missing: XSAVES (not supported by some Linux versions,
+ * including v4.1 to v4.12).
+ * KVM doesn't yet expose any XSAVES state save component,
+ * and the only one defined in Skylake (processor tracing)
+ * probably will block migration anyway.
+ */
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .xlevel = 0x80000008,
+ .model_id = "Intel Xeon Processor (Cooperlake)",
+ },
{
.name = "Icelake-Client",
.level = 0xd,

View File

@ -18,10 +18,10 @@ Signed-off-by: Andreas Färber <afaerber@suse.de>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cde2a16b941adeb1123d5d7411f3..39b9327a64d42bdace0d7346e038 100644
index 594326a7946798aba6ac42415164..5da6b243db2824f79676e4e1bbae 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1928,7 +1928,7 @@ uint64_t cpu_get_tsc(CPUX86State *env);
@@ -1934,7 +1934,7 @@ uint64_t cpu_get_tsc(CPUX86State *env);
/* XXX: This value should match the one returned by CPUID
* and in exec.c */
# if defined(TARGET_X86_64)

View File

@ -0,0 +1,303 @@
From: Tao Xu <tao3.xu@intel.com>
Date: Fri, 13 Dec 2019 09:19:22 +0800
Subject: numa: Extend CLI to provide initiator information for numa nodes
Git commit: 244b3f4485a07c7ce4b7123d6ce9d8c6012756e8
References: JIRA-SLE-10228
In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT),
The initiator represents processor which access to memory. And in 5.2.27.3
Memory Proximity Domain Attributes Structure, the attached initiator is
defined as where the memory controller responsible for a memory proximity
domain. With attached initiator information, the topology of heterogeneous
memory can be described. Add new machine property 'hmat' to enable all
HMAT specific options.
Extend CLI of "-numa node" option to indicate the initiator numa node-id.
In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables. Before using initiator option, enable HMAT with
-machine hmat=on.
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-2-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
hw/core/machine.c | 64 +++++++++++++++++++++++++++++++++++++++++++
hw/core/numa.c | 23 ++++++++++++++++
include/sysemu/numa.h | 5 ++++
qapi/machine.json | 10 ++++++-
qemu-options.hx | 35 +++++++++++++++++++----
5 files changed, 131 insertions(+), 6 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1689ad3bf8afd18f0e774ed41a8d..d7d2cfa66d58babbc723e19c9172 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -518,6 +518,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp)
ms->nvdimms_state->is_enabled = value;
}
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->numa_state->hmat_enabled = value;
+}
+
static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -645,6 +659,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ NodeInfo *numa_info = machine->numa_state->nodes;
bool match = false;
int i;
@@ -714,6 +729,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
match = true;
slot->props.node_id = props->node_id;
slot->props.has_node_id = props->has_node_id;
+
+ if (machine->numa_state->hmat_enabled) {
+ if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+ (props->node_id != numa_info[props->node_id].initiator)) {
+ error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+ " should be itself", props->node_id);
+ return;
+ }
+ numa_info[props->node_id].has_cpu = true;
+ numa_info[props->node_id].initiator = props->node_id;
+ }
}
if (!match) {
@@ -960,6 +986,13 @@ static void machine_initfn(Object *obj)
if (mc->numa_mem_supported) {
ms->numa_state = g_new0(NumaState, 1);
+ object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ &error_abort);
+ object_property_set_description(obj, "hmat",
+ "Set on/off to enable/disable "
+ "ACPI Heterogeneous Memory Attribute "
+ "Table (HMAT)", NULL);
}
/* Register notifier when init is done for sysbus sanity checks */
@@ -1048,6 +1081,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
return g_string_free(s, false);
}
+static void numa_validate_initiator(NumaState *numa_state)
+{
+ int i;
+ NodeInfo *numa_info = numa_state->nodes;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_info[i].initiator == MAX_NODES) {
+ error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].present) {
+ error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].has_cpu) {
+ error_report("The initiator of NUMA node %d is invalid", i);
+ exit(1);
+ }
+ }
+}
+
static void machine_numa_finish_cpu_init(MachineState *machine)
{
int i;
@@ -1088,6 +1147,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
machine_set_cpu_numa_node(machine, &props, &error_fatal);
}
}
+
+ if (machine->numa_state->hmat_enabled) {
+ numa_validate_initiator(machine->numa_state);
+ }
+
if (s->len && !qtest_enabled()) {
warn_report("CPU(s) not present in any NUMA nodes: %s",
s->str);
diff --git a/hw/core/numa.c b/hw/core/numa.c
index e3332a984f7c9639b2a058ac9ac7..e60da99293b4d19c090711659928 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -133,6 +133,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
}
+
+ /*
+ * If not set the initiator, set it to MAX_NODES. And if
+ * HMAT is enabled and this node has no cpus, QEMU will raise error.
+ */
+ numa_info[nodenr].initiator = MAX_NODES;
+ if (node->has_initiator) {
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ if (node->initiator >= MAX_NODES) {
+ error_report("The initiator id %" PRIu16 " expects an integer "
+ "between 0 and %d", node->initiator,
+ MAX_NODES - 1);
+ return;
+ }
+
+ numa_info[nodenr].initiator = node->initiator;
+ }
numa_info[nodenr].present = true;
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
ms->numa_state->num_nodes++;
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index ae9c41d02ba47c089d19d74b3a4f..788cbec7a2096e262555ac6e83cb 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -18,6 +18,8 @@ struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
+ bool has_cpu;
+ uint16_t initiator;
uint8_t distance[MAX_NODES];
};
@@ -33,6 +35,9 @@ struct NumaState {
/* Allow setting NUMA distance for different NUMA nodes */
bool have_numa_distance;
+ /* Detect if HMAT support is enabled. */
+ bool hmat_enabled;
+
/* NUMA nodes information */
NodeInfo nodes[MAX_NODES];
};
diff --git a/qapi/machine.json b/qapi/machine.json
index ca26779f1a3623e86befc00ee8d8..27d0e375342a502c7676d23837a7 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -463,6 +463,13 @@
# @memdev: memory backend object. If specified for one node,
# it must be specified for all nodes.
#
+# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145,
+# points to the nodeid which has the memory controller
+# responsible for this NUMA node. This field provides
+# additional information as to the initiator node that
+# is closest (as in directly attached) to this node, and
+# therefore has the best performance (since 5.0)
+#
# Since: 2.1
##
{ 'struct': 'NumaNodeOptions',
@@ -470,7 +477,8 @@
'*nodeid': 'uint16',
'*cpus': ['uint16'],
'*mem': 'size',
- '*memdev': 'str' }}
+ '*memdev': 'str',
+ '*initiator': 'uint16' }}
##
# @NumaDistOptions:
diff --git a/qemu-options.hx b/qemu-options.hx
index 65c9473b7325545c00befcbac651..63f6b33322f10cf33ab900eb292c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -43,7 +43,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
" nvdimm=on|off controls NVDIMM support (default=off)\n"
" enforce-config-section=on|off enforce configuration section migration (default=off)\n"
- " memory-encryption=@var{} memory encryption object to use (default=none)\n",
+ " memory-encryption=@var{} memory encryption object to use (default=none)\n"
+ " hmat=on|off controls ACPI HMAT support (default=off)\n",
QEMU_ARCH_ALL)
STEXI
@item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@@ -103,6 +104,9 @@ NOTE: this parameter is deprecated. Please use @option{-global}
@option{migration.send-configuration}=@var{on|off} instead.
@item memory-encryption=@var{}
Memory encryption object to use. The default is none.
+@item hmat=on|off
+Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support.
+The default is off.
@end table
ETEXI
@@ -161,14 +165,14 @@ If any on the three values is given, the total number of CPUs @var{n} can be omi
ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
- "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
- "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+ "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
+ "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n"
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
QEMU_ARCH_ALL)
STEXI
-@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
-@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
+@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
+@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
@findex -numa
@@ -215,6 +219,27 @@ split equally between them.
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
if one node uses @samp{memdev}, all of them have to use it.
+@samp{initiator} is an additional option that points to an @var{initiator}
+NUMA node that has best performance (the lowest latency or largest bandwidth)
+to this NUMA @var{node}. Note that this option can be set only when
+the machine property 'hmat' is set to 'on'.
+
+Following example creates a machine with 2 NUMA nodes, node 0 has CPU.
+node 1 has only memory, and its initiator is node 0. Note that because
+node 0 has CPU, by default the initiator of node 0 is itself and must be
+itself.
+@example
+-machine hmat=on \
+-m 2G,slots=2,maxmem=4G \
+-object memory-backend-ram,size=1G,id=m0 \
+-object memory-backend-ram,size=1G,id=m1 \
+-numa node,nodeid=0,memdev=m0 \
+-numa node,nodeid=1,memdev=m1,initiator=0 \
+-smp 2,sockets=2,maxcpus=2 \
+-numa cpu,node-id=0,socket-id=0 \
+-numa cpu,node-id=0,socket-id=1
+@end example
+
@var{source} and @var{destination} are NUMA node IDs.
@var{distance} is the NUMA distance from @var{source} to @var{destination}.
The distance from a node to itself is always 10. If any pair of nodes is

View File

@ -0,0 +1,530 @@
From: Liu Jingqi <jingqi.liu@intel.com>
Date: Fri, 13 Dec 2019 09:19:23 +0800
Subject: numa: Extend CLI to provide memory latency and bandwidth information
Git commit: 9b12dfa03a94d7f7a4b54eb67229a31e58193384
References: JIRA-SLE-10228
Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-3-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
hw/core/numa.c | 194 ++++++++++++++++++++++++++++++++++++++++++
include/sysemu/numa.h | 53 ++++++++++++
qapi/machine.json | 93 +++++++++++++++++++-
qemu-options.hx | 47 +++++++++-
4 files changed, 384 insertions(+), 3 deletions(-)
diff --git a/hw/core/numa.c b/hw/core/numa.c
index e60da99293b4d19c090711659928..34eb413f5d58a6feb11214ecc061 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
@@ -198,6 +199,186 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
ms->numa_state->have_numa_distance = true;
}
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp)
+{
+ int i, first_bit, last_bit;
+ uint64_t max_entry, temp_base, bitmap_copy;
+ NodeInfo *numa_info = numa_state->nodes;
+ HMAT_LB_Info *hmat_lb =
+ numa_state->hmat_lb[node->hierarchy][node->data_type];
+ HMAT_LB_Data lb_data = {};
+ HMAT_LB_Data *lb_temp;
+
+ /* Error checking */
+ if (node->initiator > numa_state->num_nodes) {
+ error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+ node->initiator, numa_state->num_nodes);
+ return;
+ }
+ if (node->target > numa_state->num_nodes) {
+ error_setg(errp, "Invalid target=%d, it should be less than %d",
+ node->target, numa_state->num_nodes);
+ return;
+ }
+ if (!numa_info[node->initiator].has_cpu) {
+ error_setg(errp, "Invalid initiator=%d, it isn't an "
+ "initiator proximity domain", node->initiator);
+ return;
+ }
+ if (!numa_info[node->target].present) {
+ error_setg(errp, "The target=%d should point to an existing node",
+ node->target);
+ return;
+ }
+
+ if (!hmat_lb) {
+ hmat_lb = g_malloc0(sizeof(*hmat_lb));
+ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+ }
+ hmat_lb->hierarchy = node->hierarchy;
+ hmat_lb->data_type = node->data_type;
+ lb_data.initiator = node->initiator;
+ lb_data.target = node->target;
+
+ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+ /* Input latency data */
+
+ if (!node->has_latency) {
+ error_setg(errp, "Missing 'latency' option");
+ return;
+ }
+ if (node->has_bandwidth) {
+ error_setg(errp, "Invalid option 'bandwidth' since "
+ "the data type is latency");
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the latency for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+ if (node->latency) {
+ /* Calculate the temporary base and compressed latency */
+ max_entry = node->latency;
+ temp_base = 1;
+ while (QEMU_IS_ALIGNED(max_entry, 10)) {
+ max_entry /= 10;
+ temp_base *= 10;
+ }
+
+ /* Calculate the max compressed latency */
+ temp_base = MIN(hmat_lb->base, temp_base);
+ max_entry = node->latency / hmat_lb->base;
+ max_entry = MAX(hmat_lb->range_bitmap, max_entry);
+
+ /*
+ * For latency hmat_lb->range_bitmap record the max compressed
+ * latency which should be less than 0xFFFF (UINT16_MAX)
+ */
+ if (max_entry >= UINT16_MAX) {
+ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
+ "target=%d should not differ from previously entered "
+ "min or max values on more than %d", node->latency,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = max_entry;
+ }
+
+ /*
+ * Set lb_info_provided bit 0 as 1,
+ * latency information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(0);
+ }
+ lb_data.data = node->latency;
+ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
+ /* Input bandwidth data */
+ if (!node->has_bandwidth) {
+ error_setg(errp, "Missing 'bandwidth' option");
+ return;
+ }
+ if (node->has_latency) {
+ error_setg(errp, "Invalid option 'latency' since "
+ "the data type is bandwidth");
+ return;
+ }
+ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
+ "target=%d should be 1MB aligned", node->bandwidth,
+ node->initiator, node->target);
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the bandwidth for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
+
+ if (node->bandwidth) {
+ /* Keep bitmap unchanged when bandwidth out of range */
+ bitmap_copy = hmat_lb->range_bitmap;
+ bitmap_copy |= node->bandwidth;
+ first_bit = ctz64(bitmap_copy);
+ temp_base = UINT64_C(1) << first_bit;
+ max_entry = node->bandwidth / temp_base;
+ last_bit = 64 - clz64(bitmap_copy);
+
+ /*
+ * For bandwidth, first_bit record the base unit of bandwidth bits,
+ * last_bit record the last bit of the max bandwidth. The max
+ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
+ */
+ if ((last_bit - first_bit) > UINT16_BITS ||
+ max_entry >= UINT16_MAX) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
+ "and target=%d should not differ from previously "
+ "entered values on more than %d", node->bandwidth,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = bitmap_copy;
+ }
+
+ /*
+ * Set lb_info_provided bit 1 as 1,
+ * bandwidth information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(1);
+ }
+ lb_data.data = node->bandwidth;
+ } else {
+ assert(0);
+ }
+
+ g_array_append_val(hmat_lb->list, lb_data);
+}
+
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{
Error *err = NULL;
@@ -236,6 +417,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
&err);
break;
+ case NUMA_OPTIONS_TYPE_HMAT_LB:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
+ if (err) {
+ goto end;
+ }
+ break;
default:
abort();
}
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 788cbec7a2096e262555ac6e83cb..70f93c83d71eb2cdab5bf1dde422 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -14,11 +14,34 @@ struct CPUArchId;
#define NUMA_DISTANCE_MAX 254
#define NUMA_DISTANCE_UNREACHABLE 255
+/* the value of AcpiHmatLBInfo flags */
+enum {
+ HMAT_LB_MEM_MEMORY = 0,
+ HMAT_LB_MEM_CACHE_1ST_LEVEL = 1,
+ HMAT_LB_MEM_CACHE_2ND_LEVEL = 2,
+ HMAT_LB_MEM_CACHE_3RD_LEVEL = 3,
+ HMAT_LB_LEVELS /* must be the last entry */
+};
+
+/* the value of AcpiHmatLBInfo data type */
+enum {
+ HMAT_LB_DATA_ACCESS_LATENCY = 0,
+ HMAT_LB_DATA_READ_LATENCY = 1,
+ HMAT_LB_DATA_WRITE_LATENCY = 2,
+ HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
+ HMAT_LB_DATA_READ_BANDWIDTH = 4,
+ HMAT_LB_DATA_WRITE_BANDWIDTH = 5,
+ HMAT_LB_TYPES /* must be the last entry */
+};
+
+#define UINT16_BITS 16
+
struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
bool has_cpu;
+ uint8_t lb_info_provided;
uint16_t initiator;
uint8_t distance[MAX_NODES];
};
@@ -28,6 +51,31 @@ struct NumaNodeMem {
uint64_t node_plugged_mem;
};
+struct HMAT_LB_Data {
+ uint8_t initiator;
+ uint8_t target;
+ uint64_t data;
+};
+typedef struct HMAT_LB_Data HMAT_LB_Data;
+
+struct HMAT_LB_Info {
+ /* Indicates it's memory or the specified level memory side cache. */
+ uint8_t hierarchy;
+
+ /* Present the type of data, access/read/write latency or bandwidth. */
+ uint8_t data_type;
+
+ /* The range bitmap of bandwidth for calculating common base */
+ uint64_t range_bitmap;
+
+ /* The common base unit for latencies or bandwidths */
+ uint64_t base;
+
+ /* Array to store the latencies or bandwidths */
+ GArray *list;
+};
+typedef struct HMAT_LB_Info HMAT_LB_Info;
+
struct NumaState {
/* Number of NUMA nodes */
int num_nodes;
@@ -40,11 +88,16 @@ struct NumaState {
/* NUMA nodes information */
NodeInfo nodes[MAX_NODES];
+
+ /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
+ HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
};
typedef struct NumaState NumaState;
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
void parse_numa_opts(MachineState *ms);
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp);
void numa_complete_configuration(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
extern QemuOptsList qemu_numa_opts;
diff --git a/qapi/machine.json b/qapi/machine.json
index 27d0e375342a502c7676d23837a7..cf8faf5a2a4929560c852bf8d50c 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -426,10 +426,12 @@
#
# @cpu: property based CPU(s) to node mapping (Since: 2.10)
#
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
+#
# Since: 2.1
##
{ 'enum': 'NumaOptionsType',
- 'data': [ 'node', 'dist', 'cpu' ] }
+ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
##
# @NumaOptions:
@@ -444,7 +446,8 @@
'data': {
'node': 'NumaNodeOptions',
'dist': 'NumaDistOptions',
- 'cpu': 'NumaCpuOptions' }}
+ 'cpu': 'NumaCpuOptions',
+ 'hmat-lb': 'NumaHmatLBOptions' }}
##
# @NumaNodeOptions:
@@ -557,6 +560,92 @@
'base': 'CpuInstanceProperties',
'data' : {} }
+##
+# @HmatLBMemoryHierarchy:
+#
+# The memory hierarchy in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBMemoryHierarchy, see chapter
+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
+#
+# @memory: the structure represents the memory performance
+#
+# @first-level: first level of memory side cache
+#
+# @second-level: second level of memory side cache
+#
+# @third-level: third level of memory side cache
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBMemoryHierarchy',
+ 'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
+
+##
+# @HmatLBDataType:
+#
+# Data type in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBDataType, see chapter
+# 5.2.27.4: Table 5-146: Field "Data Type" of ACPI 6.3 spec.
+#
+# @access-latency: access latency (nanoseconds)
+#
+# @read-latency: read latency (nanoseconds)
+#
+# @write-latency: write latency (nanoseconds)
+#
+# @access-bandwidth: access bandwidth (Bytes per second)
+#
+# @read-bandwidth: read bandwidth (Bytes per second)
+#
+# @write-bandwidth: write bandwidth (Bytes per second)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBDataType',
+ 'data': [ 'access-latency', 'read-latency', 'write-latency',
+ 'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
+
+##
+# @NumaHmatLBOptions:
+#
+# Set the system locality latency and bandwidth information
+# between Initiator and Target proximity Domains.
+#
+# For more information about @NumaHmatLBOptions, see chapter
+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
+#
+# @initiator: the Initiator Proximity Domain.
+#
+# @target: the Target Proximity Domain.
+#
+# @hierarchy: the Memory Hierarchy. Indicates the performance
+# of memory or side cache.
+#
+# @data-type: presents the type of data, access/read/write
+# latency or hit latency.
+#
+# @latency: the value of latency from @initiator to @target
+# proximity domain, the latency unit is "ns(nanosecond)".
+#
+# @bandwidth: the value of bandwidth between @initiator and @target
+# proximity domain, the bandwidth unit is
+# "Bytes per second".
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatLBOptions',
+ 'data': {
+ 'initiator': 'uint16',
+ 'target': 'uint16',
+ 'hierarchy': 'HmatLBMemoryHierarchy',
+ 'data-type': 'HmatLBDataType',
+ '*latency': 'uint64',
+ '*bandwidth': 'size' }}
+
##
# @HostMemPolicy:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 63f6b33322f10cf33ab900eb292c..c45e2ae513769f59aa6a61b7d67d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n"
- "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
+ "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
+ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
QEMU_ARCH_ALL)
STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
+@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
@findex -numa
Define a NUMA node and assign RAM and VCPUs to it.
Set the NUMA distance from a source node to a destination node.
+Set the ACPI Heterogeneous Memory Attributes for the given nodes.
Legacy VCPU assignment uses @samp{cpus} option where
@var{firstcpu} and @var{lastcpu} are CPU indexes. Each
@@ -256,6 +259,48 @@ specified resources, it just assigns existing resources to NUMA
nodes. This means that one still has to use the @option{-m},
@option{-smp} options to allocate RAM and VCPUs respectively.
+Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information
+between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT).
+Initiator NUMA node can create memory requests, usually it has one or more processors.
+Target NUMA node contains addressable memory.
+
+In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory
+hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure
+represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level',
+this structure represents aggregated performance of memory side caches for each domain.
+@var{type} of 'data-type' is type of data represented by this structure instance:
+if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write'
+bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level',
+'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the
+target memory side cache.
+
+@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value,
+the possible value and units are NUM[M|G|T], mean that the bandwidth value are
+NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
+Note that if latency or bandwidth value is 0, means the corresponding latency or
+bandwidth information is not provided.
+
+For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
+a ram, node 1 has only a ram. The processors in node 0 access memory in node
+0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
+The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
+nanoseconds, access-bandwidth is 100 MB/s.
+@example
+-machine hmat=on \
+-m 2G \
+-object memory-backend-ram,size=1G,id=m0 \
+-object memory-backend-ram,size=1G,id=m1 \
+-smp 2 \
+-numa node,nodeid=0,memdev=m0 \
+-numa node,nodeid=1,memdev=m1,initiator=0 \
+-numa cpu,node-id=0,socket-id=0 \
+-numa cpu,node-id=0,socket-id=1 \
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
+@end example
+
ETEXI
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,

View File

@ -0,0 +1,311 @@
From: Liu Jingqi <jingqi.liu@intel.com>
Date: Fri, 13 Dec 2019 09:19:24 +0800
Subject: numa: Extend CLI to provide memory side cache information
Git commit: c412a48d4d91e8f8b89aae02de0f44f1f0b729e5
References: JIRA-SLE-10228
Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-4-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Bruce Rogers brogers@suse.com>
---
hw/core/numa.c | 80 ++++++++++++++++++++++++++++++++++++++++++
include/sysemu/numa.h | 5 +++
qapi/machine.json | 81 +++++++++++++++++++++++++++++++++++++++++--
qemu-options.hx | 17 +++++++--
4 files changed, 179 insertions(+), 4 deletions(-)
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 34eb413f5d58a6feb11214ecc061..747c9680b02837baa309475ca265 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -379,6 +379,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
g_array_append_val(hmat_lb->list, lb_data);
}
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+ Error **errp)
+{
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ NumaHmatCacheOptions *hmat_cache = NULL;
+
+ if (node->node_id >= nb_numa_nodes) {
+ error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+ "than %d", node->node_id, nb_numa_nodes);
+ return;
+ }
+
+ if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+ error_setg(errp, "The latency and bandwidth information of "
+ "node-id=%" PRIu32 " should be provided before memory side "
+ "cache attributes", node->node_id);
+ return;
+ }
+
+ if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
+ error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
+ "and less than or equal to %d", node->level,
+ HMAT_LB_LEVELS - 1);
+ return;
+ }
+
+ assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
+ assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+ if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+ error_setg(errp, "Duplicate configuration of the side cache for "
+ "node-id=%" PRIu32 " and level=%" PRIu8,
+ node->node_id, node->level);
+ return;
+ }
+
+ if ((node->level > 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+ (node->size >=
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be less than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+ node->level - 1);
+ return;
+ }
+
+ if ((node->level < HMAT_LB_LEVELS - 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+ (node->size <=
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be larger than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+ node->level + 1);
+ return;
+ }
+
+ hmat_cache = g_malloc0(sizeof(*hmat_cache));
+ memcpy(hmat_cache, node, sizeof(*hmat_cache));
+ ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
+}
+
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{
Error *err = NULL;
@@ -430,6 +497,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
goto end;
}
break;
+ case NUMA_OPTIONS_TYPE_HMAT_CACHE:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
+ if (err) {
+ goto end;
+ }
+ break;
default:
abort();
}
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 70f93c83d71eb2cdab5bf1dde422..ba693cc80b780ecccd49a4fa9145 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -91,6 +91,9 @@ struct NumaState {
/* NUMA nodes HMAT Locality Latency and Bandwidth Information */
HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
+
+ /* Memory Side Cache Information Structure */
+ NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
};
typedef struct NumaState NumaState;
@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
void parse_numa_opts(MachineState *ms);
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
Error **errp);
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+ Error **errp);
void numa_complete_configuration(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
extern QemuOptsList qemu_numa_opts;
diff --git a/qapi/machine.json b/qapi/machine.json
index cf8faf5a2a4929560c852bf8d50c..b3d30bc8162da9a0b60005fdd86b 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -428,10 +428,12 @@
#
# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
#
+# @hmat-cache: memory side cache information (Since: 5.0)
+#
# Since: 2.1
##
{ 'enum': 'NumaOptionsType',
- 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] }
+ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
##
# @NumaOptions:
@@ -447,7 +449,8 @@
'node': 'NumaNodeOptions',
'dist': 'NumaDistOptions',
'cpu': 'NumaCpuOptions',
- 'hmat-lb': 'NumaHmatLBOptions' }}
+ 'hmat-lb': 'NumaHmatLBOptions',
+ 'hmat-cache': 'NumaHmatCacheOptions' }}
##
# @NumaNodeOptions:
@@ -646,6 +649,80 @@
'*latency': 'uint64',
'*bandwidth': 'size' }}
+##
+# @HmatCacheAssociativity:
+#
+# Cache associativity in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheAssociativity, see chapter
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+# or cache associativity unknown)
+#
+# @direct: Direct Mapped
+#
+# @complex: Complex Cache Indexing (implementation specific)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheAssociativity',
+ 'data': [ 'none', 'direct', 'complex' ] }
+
+##
+# @HmatCacheWritePolicy:
+#
+# Cache write policy in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheWritePolicy, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+# or cache write policy unknown)
+#
+# @write-back: Write Back (WB)
+#
+# @write-through: Write Through (WT)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheWritePolicy',
+ 'data': [ 'none', 'write-back', 'write-through' ] }
+
+##
+# @NumaHmatCacheOptions:
+#
+# Set the memory side cache information for a given memory domain.
+#
+# For more information of @NumaHmatCacheOptions, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @node-id: the memory proximity domain to which the memory belongs.
+#
+# @size: the size of memory side cache in bytes.
+#
+# @level: the cache level described in this structure.
+#
+# @associativity: the cache associativity,
+# none/direct-mapped/complex(complex cache indexing).
+#
+# @policy: the write policy, none/write-back/write-through.
+#
+# @line: the cache Line size in bytes.
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatCacheOptions',
+ 'data': {
+ 'node-id': 'uint32',
+ 'size': 'size',
+ 'level': 'uint8',
+ 'associativity': 'HmatCacheAssociativity',
+ 'policy': 'HmatCacheWritePolicy',
+ 'line': 'uint16' }}
+
##
# @HostMemPolicy:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index c45e2ae513769f59aa6a61b7d67d..16e8888fccb59212bcbb078cd98e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n"
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
- "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n",
+ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
+ "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
QEMU_ARCH_ALL)
STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@@ -177,6 +178,7 @@ STEXI
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
@findex -numa
Define a NUMA node and assign RAM and VCPUs to it.
Set the NUMA distance from a source node to a destination node.
@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
Note that if latency or bandwidth value is 0, means the corresponding latency or
bandwidth information is not provided.
+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
+@var{size} is the size of memory side cache in bytes. @var{level} is the cache
+level described in this structure, note that the cache level 0 should not be used
+with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
+@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
+
For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
a ram, node 1 has only a ram. The processors in node 0 access memory in node
0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
nanoseconds, access-bandwidth is 100 MB/s.
+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
@example
-machine hmat=on \
-m 2G \
@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s.
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
--numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
@end example
ETEXI

View File

@ -1,3 +1,22 @@
-------------------------------------------------------------------
Thu Jan 9 17:48:25 UTC 2020 - Bruce Rogers <brogers@suse.com>
- Add Cooperlake vcpu model (jira-SLE-10195)
i386-Add-MSR-feature-bit-for-MDS-NO.patch
i386-Add-macro-for-stibp.patch
i386-Add-new-CPU-model-Cooperlake.patch
target-i386-Add-new-bit-definitions-of-M.patch
target-i386-Add-missed-features-to-Coope.patch
- Add HMAT support (jira-SLE-10228) (the test case for this series
isn't included because we aren't set up to handle binary patches)
numa-Extend-CLI-to-provide-initiator-inf.patch
numa-Extend-CLI-to-provide-memory-latenc.patch
numa-Extend-CLI-to-provide-memory-side-c.patch
hmat-acpi-Build-Memory-Proximity-Domain-.patch
hmat-acpi-Build-System-Locality-Latency-.patch
hmat-acpi-Build-Memory-Side-Cache-Inform.patch
tests-numa-Add-case-for-QMP-build-HMAT.patch
-------------------------------------------------------------------
Thu Dec 12 19:05:29 UTC 2019 - Bruce Rogers <brogers@suse.com>

108
qemu.spec
View File

@ -1,7 +1,7 @@
#
# spec file for package qemu
#
# Copyright (c) 2019 SUSE LLC
# Copyright (c) 2020 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@ -125,47 +125,59 @@ Source303: README.PACKAGING
# This patch queue is auto-generated - see README.PACKAGING for process
# Patches applied in base project:
Patch00000: XXX-dont-dump-core-on-sigabort.patch
Patch00001: qemu-binfmt-conf-Modify-default-path.patch
Patch00002: qemu-cvs-gettimeofday.patch
Patch00003: qemu-cvs-ioctl_debug.patch
Patch00004: qemu-cvs-ioctl_nodirection.patch
Patch00005: linux-user-add-binfmt-wrapper-for-argv-0.patch
Patch00006: PPC-KVM-Disable-mmu-notifier-check.patch
Patch00007: linux-user-binfmt-support-host-binaries.patch
Patch00008: linux-user-Fake-proc-cpuinfo.patch
Patch00009: linux-user-use-target_ulong.patch
Patch00010: Make-char-muxer-more-robust-wrt-small-FI.patch
Patch00011: linux-user-lseek-explicitly-cast-non-set.patch
Patch00012: AIO-Reduce-number-of-threads-for-32bit-h.patch
Patch00013: xen_disk-Add-suse-specific-flush-disable.patch
Patch00014: qemu-bridge-helper-reduce-security-profi.patch
Patch00015: qemu-binfmt-conf-use-qemu-ARCH-binfmt.patch
Patch00016: linux-user-properly-test-for-infinite-ti.patch
Patch00017: roms-Makefile-pass-a-packaging-timestamp.patch
Patch00018: Raise-soft-address-space-limit-to-hard-l.patch
Patch00019: increase-x86_64-physical-bits-to-42.patch
Patch00020: vga-Raise-VRAM-to-16-MiB-for-pc-0.15-and.patch
Patch00021: i8254-Fix-migration-from-SLE11-SP2.patch
Patch00022: acpi_piix4-Fix-migration-from-SLE11-SP2.patch
Patch00023: Switch-order-of-libraries-for-mpath-supp.patch
Patch00024: Make-installed-scripts-explicitly-python.patch
Patch00025: hw-smbios-handle-both-file-formats-regar.patch
Patch00026: xen-add-block-resize-support-for-xen-dis.patch
Patch00027: tests-qemu-iotests-Triple-timeout-of-i-o.patch
Patch00028: tests-Fix-block-tests-to-be-compatible-w.patch
Patch00029: xen-ignore-live-parameter-from-xen-save-.patch
Patch00030: Conditionalize-ui-bitmap-installation-be.patch
Patch00031: tests-change-error-message-in-test-162.patch
Patch00032: hw-usb-hcd-xhci-Fix-GCC-9-build-warning.patch
Patch00033: hw-usb-dev-mtp-Fix-GCC-9-build-warning.patch
Patch00034: hw-intc-exynos4210_gic-provide-more-room.patch
Patch00035: configure-only-populate-roms-if-softmmu.patch
Patch00036: pc-bios-s390-ccw-net-avoid-warning-about.patch
Patch00037: roms-change-cross-compiler-naming-to-be-.patch
Patch00038: tests-Disable-some-block-tests-for-now.patch
Patch00039: test-add-mapping-from-arch-of-i686-to-qe.patch
Patch00040: roms-Makefile-enable-cross-compile-for-b.patch
Patch00000: i386-Add-MSR-feature-bit-for-MDS-NO.patch
Patch00001: i386-Add-macro-for-stibp.patch
Patch00002: i386-Add-new-CPU-model-Cooperlake.patch
Patch00003: numa-Extend-CLI-to-provide-initiator-inf.patch
Patch00004: numa-Extend-CLI-to-provide-memory-latenc.patch
Patch00005: numa-Extend-CLI-to-provide-memory-side-c.patch
Patch00006: hmat-acpi-Build-Memory-Proximity-Domain-.patch
Patch00007: hmat-acpi-Build-System-Locality-Latency-.patch
Patch00008: hmat-acpi-Build-Memory-Side-Cache-Inform.patch
Patch00009: tests-numa-Add-case-for-QMP-build-HMAT.patch
Patch00010: target-i386-Add-new-bit-definitions-of-M.patch
Patch00011: target-i386-Add-missed-features-to-Coope.patch
Patch00012: XXX-dont-dump-core-on-sigabort.patch
Patch00013: qemu-binfmt-conf-Modify-default-path.patch
Patch00014: qemu-cvs-gettimeofday.patch
Patch00015: qemu-cvs-ioctl_debug.patch
Patch00016: qemu-cvs-ioctl_nodirection.patch
Patch00017: linux-user-add-binfmt-wrapper-for-argv-0.patch
Patch00018: PPC-KVM-Disable-mmu-notifier-check.patch
Patch00019: linux-user-binfmt-support-host-binaries.patch
Patch00020: linux-user-Fake-proc-cpuinfo.patch
Patch00021: linux-user-use-target_ulong.patch
Patch00022: Make-char-muxer-more-robust-wrt-small-FI.patch
Patch00023: linux-user-lseek-explicitly-cast-non-set.patch
Patch00024: AIO-Reduce-number-of-threads-for-32bit-h.patch
Patch00025: xen_disk-Add-suse-specific-flush-disable.patch
Patch00026: qemu-bridge-helper-reduce-security-profi.patch
Patch00027: qemu-binfmt-conf-use-qemu-ARCH-binfmt.patch
Patch00028: linux-user-properly-test-for-infinite-ti.patch
Patch00029: roms-Makefile-pass-a-packaging-timestamp.patch
Patch00030: Raise-soft-address-space-limit-to-hard-l.patch
Patch00031: increase-x86_64-physical-bits-to-42.patch
Patch00032: vga-Raise-VRAM-to-16-MiB-for-pc-0.15-and.patch
Patch00033: i8254-Fix-migration-from-SLE11-SP2.patch
Patch00034: acpi_piix4-Fix-migration-from-SLE11-SP2.patch
Patch00035: Switch-order-of-libraries-for-mpath-supp.patch
Patch00036: Make-installed-scripts-explicitly-python.patch
Patch00037: hw-smbios-handle-both-file-formats-regar.patch
Patch00038: xen-add-block-resize-support-for-xen-dis.patch
Patch00039: tests-qemu-iotests-Triple-timeout-of-i-o.patch
Patch00040: tests-Fix-block-tests-to-be-compatible-w.patch
Patch00041: xen-ignore-live-parameter-from-xen-save-.patch
Patch00042: Conditionalize-ui-bitmap-installation-be.patch
Patch00043: tests-change-error-message-in-test-162.patch
Patch00044: hw-usb-hcd-xhci-Fix-GCC-9-build-warning.patch
Patch00045: hw-usb-dev-mtp-Fix-GCC-9-build-warning.patch
Patch00046: hw-intc-exynos4210_gic-provide-more-room.patch
Patch00047: configure-only-populate-roms-if-softmmu.patch
Patch00048: pc-bios-s390-ccw-net-avoid-warning-about.patch
Patch00049: roms-change-cross-compiler-naming-to-be-.patch
Patch00050: tests-Disable-some-block-tests-for-now.patch
Patch00051: test-add-mapping-from-arch-of-i686-to-qe.patch
Patch00052: roms-Makefile-enable-cross-compile-for-b.patch
# Patches applied in roms/seabios/:
Patch01000: seabios-use-python2-explicitly-as-needed.patch
Patch01001: seabios-switch-to-python3-as-needed.patch
@ -913,6 +925,18 @@ This package provides a service file for starting and stopping KSM.
%patch00038 -p1
%patch00039 -p1
%patch00040 -p1
%patch00041 -p1
%patch00042 -p1
%patch00043 -p1
%patch00044 -p1
%patch00045 -p1
%patch00046 -p1
%patch00047 -p1
%patch00048 -p1
%patch00049 -p1
%patch00050 -p1
%patch00051 -p1
%patch00052 -p1
%patch01000 -p1
%patch01001 -p1
%patch01002 -p1

View File

@ -1,7 +1,7 @@
#
# spec file for package qemu
#
# Copyright (c) 2019 SUSE LLC
# Copyright (c) 2020 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed

View File

@ -0,0 +1,88 @@
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 8 Jan 2020 13:32:40 +0100
Subject: target/i386: Add missed features to Cooperlake CPU model
Git-commit: 0000000000000000000000000000000000000000
References: JIRA-SLE-1015
It lacks VMX features and two security feature bits (disclosed recently) in
MSR_IA32_ARCH_CAPABILITIES in current Cooperlake CPU model, so add them.
Fixes: 22a866b6166d ("i386: Add new CPU model Cooperlake")
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-Id: <20191225063018.20038-3-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
target/i386/cpu.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 50 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index de828e29d8d6a35c1f03bc4a456a..b49e47ddf590d4d23683bb47212a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3201,7 +3201,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES,
.features[FEAT_ARCH_CAPABILITIES] =
MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
- MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO,
+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
.features[FEAT_7_1_EAX] =
CPUID_7_1_EAX_AVX512_BF16,
/*
@@ -3216,6 +3217,54 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_XSAVE_XGETBV1,
.features[FEAT_6_EAX] =
CPUID_6_EAX_ARAT,
+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */
+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS |
+ MSR_VMX_BASIC_TRUE_CTLS,
+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE |
+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT |
+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER,
+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY |
+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB |
+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT |
+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS,
+ .features[FEAT_VMX_EXIT_CTLS] =
+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER |
+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT |
+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT,
+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK |
+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS |
+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR,
+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING |
+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS |
+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING |
+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS |
+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
+ VMX_CPU_BASED_MONITOR_TRAP_FLAG |
+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+ .features[FEAT_VMX_SECONDARY_CTLS] =
+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT |
+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP |
+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID |
+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML,
+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
.xlevel = 0x80000008,
.model_id = "Intel Xeon Processor (Cooperlake)",
},

View File

@ -0,0 +1,44 @@
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 8 Jan 2020 13:32:39 +0100
Subject: target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES
Git-commit: 0000000000000000000000000000000000000000
References: JIRA-SLE-1015
The bit 6, 7 and 8 of MSR_IA32_ARCH_CAPABILITIES are recently disclosed
for some security issues. Add the definitions for them to be used by named
CPU models.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-Id: <20191225063018.20038-2-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
target/i386/cpu.h | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index af282936a785a25f651d0db1a8cf..594326a7946798aba6ac42415164 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -835,12 +835,15 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8)
/* MSR Feature Bits */
-#define MSR_ARCH_CAP_RDCL_NO (1U << 0)
-#define MSR_ARCH_CAP_IBRS_ALL (1U << 1)
-#define MSR_ARCH_CAP_RSBA (1U << 2)
+#define MSR_ARCH_CAP_RDCL_NO (1U << 0)
+#define MSR_ARCH_CAP_IBRS_ALL (1U << 1)
+#define MSR_ARCH_CAP_RSBA (1U << 2)
#define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3)
-#define MSR_ARCH_CAP_SSB_NO (1U << 4)
-#define MSR_ARCH_CAP_MDS_NO (1U << 5)
+#define MSR_ARCH_CAP_SSB_NO (1U << 4)
+#define MSR_ARCH_CAP_MDS_NO (1U << 5)
+#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6)
+#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7)
+#define MSR_ARCH_CAP_TAA_NO (1U << 8)
#define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5)

View File

@ -0,0 +1,252 @@
From: Tao Xu <tao3.xu@intel.com>
Date: Fri, 13 Dec 2019 09:19:28 +0800
Subject: tests/numa: Add case for QMP build HMAT
Git commit: d00817c944ed15fbe4a61d44fe7f9fe166c7df88
References: JIRA-SLE-10228
Check configuring HMAT usecase
Acked-by: Markus Armbruster <armbru@redhat.com>
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-8-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Bruce Rogers <brogers@suse.com>
---
tests/numa-test.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 213 insertions(+)
diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581231dd3e3299bc61d40d8d..17dd807d2a4329aea2e96a845edd 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
qtest_quit(qs);
}
+static void pc_hmat_build_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+ /* Fail: Initiator should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Target should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Initiator should contain cpu */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Data-type mismatch */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"write-latency\","
+ " 'bandwidth': 524288000 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
+ " 'latency': 5 } }")));
+
+ /* Fail: Bandwidth should be 1MB (1048576) aligned */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 1048575 } }")));
+
+ /* Configuring HMAT bandwidth and latency details */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }"))); /* 1 ns */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 5 } }"))); /* Fail: Duplicate configuration */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 68717379584 } }"))); /* 65534 MB/s */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 65534 } }"))); /* 65534 ns */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 34358689792 } }"))); /* 32767 MB/s */
+
+ /* Fail: node_id should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 2, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Fail: level should be less than HMAT_LB_LEVELS (4) */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 4, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Fail: associativity option should be 'none', if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"direct\", 'policy': \"none\","
+ " 'line': 0 } }")));
+ /* Fail: policy option should be 'none', if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"none\", 'policy': \"write-back\","
+ " 'line': 0 } }")));
+ /* Fail: line option should be 0, if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"none\", 'policy': \"none\","
+ " 'line': 8 } }")));
+
+ /* Configuring HMAT memory side cache attributes */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }"))); /* Fail: Duplicate configuration */
+ /* Fail: The size of level 2 size should be small than level 1 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 2, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ /* Fail: The size of level 0 size should be larger than level 1 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 1, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
+static void pc_hmat_off_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0",
+ data ? (char *)data : "");
+
+ /*
+ * Fail: Enable HMAT with -machine hmat=on
+ * before using any of hmat specific options
+ */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\","
+ " 'initiator': 0 } }")));
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\" } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
+static void pc_hmat_erange_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+ /* Can't store the compressed latency */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }"))); /* 1 ns */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 65535 } }"))); /* 65535 ns */
+
+ /* Test the 0 input (bandwidth not provided) */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 0 } }"))); /* 0 MB/s */
+ /* Fail: bandwidth should be provided before memory side cache attributes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Can't store the compressed bandwidth */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 68718428160 } }"))); /* 65535 MB/s */
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
int main(int argc, char **argv)
{
const char *args = NULL;
@@ -346,6 +556,9 @@ int main(int argc, char **argv)
if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) {
qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu);
qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg);
+ qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg);
+ qtest_add_data_func("/numa/pc/hmat/off", args, pc_hmat_off_cfg);
+ qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg);
}
if (!strcmp(arch, "ppc64")) {