79c4324644
Change-Id: I2d302dda68298877c65c99147f5bf22186a59aac
284 lines
12 KiB
Diff
284 lines
12 KiB
Diff
From bf4a20a82bd4804842dd2960db30e0be7ecb2d32 Mon Sep 17 00:00:00 2001
|
|
From: Yanan Wang <wangyanan55@huawei.com>
|
|
Date: Tue, 28 Dec 2021 17:22:09 +0800
|
|
Subject: [PATCH 11/24] hw/core/machine: Introduce CPU cluster topology support
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
|
|
which has been proved to benefit the scheduling performance (e.g.
|
|
load balance and wake_affine strategy) on both x86_64 and AArch64.
|
|
|
|
So now in Linux 5.16 we have four-level arch-neutral CPU topology
|
|
definition like below and a new scheduler level for clusters.
|
|
struct cpu_topology {
|
|
int thread_id;
|
|
int core_id;
|
|
int cluster_id;
|
|
int package_id;
|
|
int llc_id;
|
|
cpumask_t thread_sibling;
|
|
cpumask_t core_sibling;
|
|
cpumask_t cluster_sibling;
|
|
cpumask_t llc_sibling;
|
|
}
|
|
|
|
A cluster generally means a group of CPU cores which share L2 cache
|
|
or other mid-level resources, and it is the shared resources that
|
|
is used to improve scheduler's behavior. From the point of view of
|
|
the size range, it's between CPU die and CPU core. For example, on
|
|
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
|
|
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
|
|
L2 cache and a L3 cache tag, which brings cache affinity advantage.
|
|
|
|
In virtualization, on the Hosts which have pClusters (physical
|
|
clusters), if we can design a vCPU topology with cluster level for
|
|
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
|
|
Guest kernel can also make use of the cache affinity of CPU clusters
|
|
to gain similar scheduling performance.
|
|
|
|
This patch adds infrastructure for CPU cluster level topology
|
|
configuration and parsing, so that the user can specify cluster
|
|
parameter if their machines support it.
|
|
|
|
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
|
|
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
|
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
|
|
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
---
|
|
hw/core/machine-smp.c | 26 +++++++++++++++++++-------
|
|
hw/core/machine.c | 3 +++
|
|
include/hw/boards.h | 6 +++++-
|
|
qapi/machine.json | 5 ++++-
|
|
qemu-options.hx | 7 ++++---
|
|
softmmu/vl.c | 3 +++
|
|
6 files changed, 38 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
|
|
index 2cbfd57429..b39ed21e65 100644
|
|
--- a/hw/core/machine-smp.c
|
|
+++ b/hw/core/machine-smp.c
|
|
@@ -37,6 +37,10 @@ static char *cpu_hierarchy_to_string(MachineState *ms)
|
|
g_string_append_printf(s, " * dies (%u)", ms->smp.dies);
|
|
}
|
|
|
|
+ if (mc->smp_props.clusters_supported) {
|
|
+ g_string_append_printf(s, " * clusters (%u)", ms->smp.clusters);
|
|
+ }
|
|
+
|
|
g_string_append_printf(s, " * cores (%u)", ms->smp.cores);
|
|
g_string_append_printf(s, " * threads (%u)", ms->smp.threads);
|
|
|
|
@@ -71,6 +75,7 @@ void machine_parse_smp_config(MachineState *ms,
|
|
unsigned cpus = config->has_cpus ? config->cpus : 0;
|
|
unsigned sockets = config->has_sockets ? config->sockets : 0;
|
|
unsigned dies = config->has_dies ? config->dies : 0;
|
|
+ unsigned clusters = config->has_clusters ? config->clusters : 0;
|
|
unsigned cores = config->has_cores ? config->cores : 0;
|
|
unsigned threads = config->has_threads ? config->threads : 0;
|
|
unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
|
|
@@ -82,6 +87,7 @@ void machine_parse_smp_config(MachineState *ms,
|
|
if ((config->has_cpus && config->cpus == 0) ||
|
|
(config->has_sockets && config->sockets == 0) ||
|
|
(config->has_dies && config->dies == 0) ||
|
|
+ (config->has_clusters && config->clusters == 0) ||
|
|
(config->has_cores && config->cores == 0) ||
|
|
(config->has_threads && config->threads == 0) ||
|
|
(config->has_maxcpus && config->maxcpus == 0)) {
|
|
@@ -97,8 +103,13 @@ void machine_parse_smp_config(MachineState *ms,
|
|
error_setg(errp, "dies not supported by this machine's CPU topology");
|
|
return;
|
|
}
|
|
+ if (!mc->smp_props.clusters_supported && clusters > 1) {
|
|
+ error_setg(errp, "clusters not supported by this machine's CPU topology");
|
|
+ return;
|
|
+ }
|
|
|
|
dies = dies > 0 ? dies : 1;
|
|
+ clusters = clusters > 0 ? clusters : 1;
|
|
|
|
/* compute missing values based on the provided ones */
|
|
if (cpus == 0 && maxcpus == 0) {
|
|
@@ -113,41 +124,42 @@ void machine_parse_smp_config(MachineState *ms,
|
|
if (sockets == 0) {
|
|
cores = cores > 0 ? cores : 1;
|
|
threads = threads > 0 ? threads : 1;
|
|
- sockets = maxcpus / (dies * cores * threads);
|
|
+ sockets = maxcpus / (dies * clusters * cores * threads);
|
|
} else if (cores == 0) {
|
|
threads = threads > 0 ? threads : 1;
|
|
- cores = maxcpus / (sockets * dies * threads);
|
|
+ cores = maxcpus / (sockets * dies * clusters * threads);
|
|
}
|
|
} else {
|
|
/* prefer cores over sockets since 6.2 */
|
|
if (cores == 0) {
|
|
sockets = sockets > 0 ? sockets : 1;
|
|
threads = threads > 0 ? threads : 1;
|
|
- cores = maxcpus / (sockets * dies * threads);
|
|
+ cores = maxcpus / (sockets * dies * clusters * threads);
|
|
} else if (sockets == 0) {
|
|
threads = threads > 0 ? threads : 1;
|
|
- sockets = maxcpus / (dies * cores * threads);
|
|
+ sockets = maxcpus / (dies * clusters * cores * threads);
|
|
}
|
|
}
|
|
|
|
/* try to calculate omitted threads at last */
|
|
if (threads == 0) {
|
|
- threads = maxcpus / (sockets * dies * cores);
|
|
+ threads = maxcpus / (sockets * dies * clusters * cores);
|
|
}
|
|
}
|
|
|
|
- maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads;
|
|
+ maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * clusters * cores * threads;
|
|
cpus = cpus > 0 ? cpus : maxcpus;
|
|
|
|
ms->smp.cpus = cpus;
|
|
ms->smp.sockets = sockets;
|
|
ms->smp.dies = dies;
|
|
+ ms->smp.clusters = clusters;
|
|
ms->smp.cores = cores;
|
|
ms->smp.threads = threads;
|
|
ms->smp.max_cpus = maxcpus;
|
|
|
|
/* sanity-check of the computed topology */
|
|
- if (sockets * dies * cores * threads != maxcpus) {
|
|
+ if (sockets * dies * clusters * cores * threads != maxcpus) {
|
|
g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
|
|
error_setg(errp, "Invalid CPU topology: "
|
|
"product of the hierarchy must match maxcpus: "
|
|
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
|
index 3993c534b9..a4a2df405f 100644
|
|
--- a/hw/core/machine.c
|
|
+++ b/hw/core/machine.c
|
|
@@ -742,10 +742,12 @@ static void machine_get_smp(Object *obj, Visitor *v, const char *name,
|
|
.has_cpus = true, .cpus = ms->smp.cpus,
|
|
.has_sockets = true, .sockets = ms->smp.sockets,
|
|
.has_dies = true, .dies = ms->smp.dies,
|
|
+ .has_clusters = true, .clusters = ms->smp.clusters,
|
|
.has_cores = true, .cores = ms->smp.cores,
|
|
.has_threads = true, .threads = ms->smp.threads,
|
|
.has_maxcpus = true, .maxcpus = ms->smp.max_cpus,
|
|
};
|
|
+
|
|
if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) {
|
|
return;
|
|
}
|
|
@@ -932,6 +934,7 @@ static void machine_initfn(Object *obj)
|
|
ms->smp.max_cpus = mc->default_cpus;
|
|
ms->smp.sockets = 1;
|
|
ms->smp.dies = 1;
|
|
+ ms->smp.clusters = 1;
|
|
ms->smp.cores = 1;
|
|
ms->smp.threads = 1;
|
|
}
|
|
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
|
index 7597cec440..f49a2578ea 100644
|
|
--- a/include/hw/boards.h
|
|
+++ b/include/hw/boards.h
|
|
@@ -129,10 +129,12 @@ typedef struct {
|
|
* SMPCompatProps:
|
|
* @prefer_sockets - whether sockets are preferred over cores in smp parsing
|
|
* @dies_supported - whether dies are supported by the machine
|
|
+ * @clusters_supported - whether clusters are supported by the machine
|
|
*/
|
|
typedef struct {
|
|
bool prefer_sockets;
|
|
bool dies_supported;
|
|
+ bool clusters_supported;
|
|
} SMPCompatProps;
|
|
|
|
/**
|
|
@@ -299,7 +301,8 @@ typedef struct DeviceMemoryState {
|
|
* @cpus: the number of present logical processors on the machine
|
|
* @sockets: the number of sockets on the machine
|
|
* @dies: the number of dies in one socket
|
|
- * @cores: the number of cores in one die
|
|
+ * @clusters: the number of clusters in one die
|
|
+ * @cores: the number of cores in one cluster
|
|
* @threads: the number of threads in one core
|
|
* @max_cpus: the maximum number of logical processors on the machine
|
|
*/
|
|
@@ -307,6 +310,7 @@ typedef struct CpuTopology {
|
|
unsigned int cpus;
|
|
unsigned int sockets;
|
|
unsigned int dies;
|
|
+ unsigned int clusters;
|
|
unsigned int cores;
|
|
unsigned int threads;
|
|
unsigned int max_cpus;
|
|
diff --git a/qapi/machine.json b/qapi/machine.json
|
|
index f1839acf20..8faa51074e 100644
|
|
--- a/qapi/machine.json
|
|
+++ b/qapi/machine.json
|
|
@@ -1396,7 +1396,9 @@
|
|
#
|
|
# @dies: number of dies per socket in the CPU topology
|
|
#
|
|
-# @cores: number of cores per die in the CPU topology
|
|
+# @clusters: number of clusters per die in the CPU topology (since 7.0)
|
|
+#
|
|
+# @cores: number of cores per cluster in the CPU topology
|
|
#
|
|
# @threads: number of threads per core in the CPU topology
|
|
#
|
|
@@ -1408,6 +1410,7 @@
|
|
'*cpus': 'int',
|
|
'*sockets': 'int',
|
|
'*dies': 'int',
|
|
+ '*clusters': 'int',
|
|
'*cores': 'int',
|
|
'*threads': 'int',
|
|
'*maxcpus': 'int' } }
|
|
diff --git a/qemu-options.hx b/qemu-options.hx
|
|
index 7a59db7764..0f26f7dad7 100644
|
|
--- a/qemu-options.hx
|
|
+++ b/qemu-options.hx
|
|
@@ -206,13 +206,14 @@ SRST
|
|
ERST
|
|
|
|
DEF("smp", HAS_ARG, QEMU_OPTION_smp,
|
|
- "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]\n"
|
|
+ "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n"
|
|
" set the number of initial CPUs to 'n' [default=1]\n"
|
|
" maxcpus= maximum number of total CPUs, including\n"
|
|
" offline CPUs for hotplug, etc\n"
|
|
" sockets= number of sockets on the machine board\n"
|
|
" dies= number of dies in one socket\n"
|
|
- " cores= number of cores in one die\n"
|
|
+ " clusters= number of clusters in one die\n"
|
|
+ " cores= number of cores in one cluster\n"
|
|
" threads= number of threads in one core\n"
|
|
"Note: Different machines may have different subsets of the CPU topology\n"
|
|
" parameters supported, so the actual meaning of the supported parameters\n"
|
|
@@ -228,7 +229,7 @@ DEF("smp", HAS_ARG, QEMU_OPTION_smp,
|
|
" must be set as 1 in the purpose of correct parsing.\n",
|
|
QEMU_ARCH_ALL)
|
|
SRST
|
|
-``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]``
|
|
+``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]``
|
|
Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
|
|
the machine type board. On boards supporting CPU hotplug, the optional
|
|
'\ ``maxcpus``\ ' parameter can be set to enable further CPUs to be
|
|
diff --git a/softmmu/vl.c b/softmmu/vl.c
|
|
index 620a1f1367..d9e4c619d3 100644
|
|
--- a/softmmu/vl.c
|
|
+++ b/softmmu/vl.c
|
|
@@ -726,6 +726,9 @@ static QemuOptsList qemu_smp_opts = {
|
|
}, {
|
|
.name = "dies",
|
|
.type = QEMU_OPT_NUMBER,
|
|
+ }, {
|
|
+ .name = "clusters",
|
|
+ .type = QEMU_OPT_NUMBER,
|
|
}, {
|
|
.name = "cores",
|
|
.type = QEMU_OPT_NUMBER,
|
|
--
|
|
2.27.0
|
|
|