110 lines
4.2 KiB
Diff
110 lines
4.2 KiB
Diff
From f12c0d5c6e84f9409ac3a73c066841a8ff5aab0b Mon Sep 17 00:00:00 2001
|
|
From: Jim Warner <james.warner@comcast.net>
|
|
Date: Fri, 8 Nov 2013 00:00:00 -0600
|
|
Subject: [PATCH] top: minimize the statistics overhead for numa support
|
|
|
|
A recent libnuma potential corruption problem solution
|
|
has caused me to reevaluate some associated numa logic
|
|
for efficiency. Here is a summary of the problems that
|
|
exist with current libnuma/user possible interactions:
|
|
|
|
. Whenever the numa library was present extra overhead
|
|
would always be incurred in maintaining the node stats
|
|
even when the '2' or '3' commands were not being used.
|
|
|
|
. As part of such overhead a separate loop was used to
|
|
reinitialize each cpu/node structure with each display
|
|
cycle so that prior accumulated totals were preserved.
|
|
Again, it didn't matter if numa data was really shown.
|
|
|
|
This commit attempts to refocus on the 'critical path'
|
|
costs in a running top by optimizing for the occasions
|
|
when numa node data is not being displayed. Under such
|
|
conditions, no extra overhead will be incurred whether
|
|
or not a distribution has the libnuma library present.
|
|
|
|
To achieve this goal, some additional overhead will be
|
|
incurred, but only when actually displaying numa data.
|
|
And all such new costs have been minimized in spite of
|
|
the gcc inclination to duplicate subscript resolution.
|
|
|
|
Reference(s):
|
|
commit 24bd950cb2e1722d459461f0f9c0c30a4b9ffdaa
|
|
|
|
Signed-off-by: Jim Warner <james.warner@comcast.net>
|
|
---
|
|
top/top.c | 46 +++++++++++++++++++++++-----------------------
|
|
1 file changed, 23 insertions(+), 23 deletions(-)
|
|
|
|
diff --git top/top.c top/top.c
|
|
index e619ddd..9d12693 100644
|
|
--- top/top.c
|
|
+++ top/top.c
|
|
@@ -2361,18 +2361,9 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
|
|
#endif
|
|
|
|
#ifndef NUMA_DISABLE
|
|
- for (i = 0; i < Numa_node_tot; i++) {
|
|
- node = sumSLOT + 1 + i;
|
|
- // remember from last time around
|
|
- memcpy(&cpus[node].sav, &cpus[node].cur, sizeof(CT_t));
|
|
- // initialize current node statistics
|
|
- memset(&cpus[node].cur, 0, sizeof(CT_t));
|
|
-#ifndef CPU_ZEROTICS
|
|
- cpus[node].edge = cpus[sumSLOT].edge;
|
|
- // this is for symmetry only, it's not currently required
|
|
- cpus[node].cur.tot = cpus[sumSLOT].cur.tot;
|
|
-#endif
|
|
- }
|
|
+ // forget all of the prior node statistics (maybe)
|
|
+ if (CHKw(Curwin, View_CPUNOD))
|
|
+ memset(&cpus[sumSLOT + 1], 0, Numa_node_tot * sizeof(CPU_t));
|
|
#endif
|
|
|
|
// now value each separate cpu's tics...
|
|
@@ -2400,21 +2391,30 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
|
|
cpus[i].id = i;
|
|
#endif
|
|
#ifndef NUMA_DISABLE
|
|
- if (Numa_node_tot
|
|
+ /* henceforth, with just a little more arithmetic we can avoid
|
|
+ maintaining *any* node stats unless they're actually needed */
|
|
+ if (CHKw(Curwin, View_CPUNOD)
|
|
+ && Numa_node_tot
|
|
&& -1 < (node = Numa_node_of_cpu(cpus[i].id))) {
|
|
+ // use our own pointer to avoid gcc subscript bloat
|
|
+ CPU_t *nod_ptr = &cpus[sumSLOT + 1 + node];
|
|
+ nod_ptr->cur.u += cpus[i].cur.u; nod_ptr->sav.u += cpus[i].sav.u;
|
|
+ nod_ptr->cur.n += cpus[i].cur.n; nod_ptr->sav.n += cpus[i].sav.n;
|
|
+ nod_ptr->cur.s += cpus[i].cur.s; nod_ptr->sav.s += cpus[i].sav.s;
|
|
+ nod_ptr->cur.i += cpus[i].cur.i; nod_ptr->sav.i += cpus[i].sav.i;
|
|
+ nod_ptr->cur.w += cpus[i].cur.w; nod_ptr->sav.w += cpus[i].sav.w;
|
|
+ nod_ptr->cur.x += cpus[i].cur.x; nod_ptr->sav.x += cpus[i].sav.x;
|
|
+ nod_ptr->cur.y += cpus[i].cur.y; nod_ptr->sav.y += cpus[i].sav.y;
|
|
+ nod_ptr->cur.z += cpus[i].cur.z; nod_ptr->sav.z += cpus[i].sav.z;
|
|
+#ifndef CPU_ZEROTICS
|
|
+ /* yep, we re-value this repeatedly for each cpu encountered, but we
|
|
+ can then avoid a prior loop to selectively initialize each node */
|
|
+ nod_ptr->edge = cpus[sumSLOT].edge;
|
|
+#endif
|
|
cpus[i].node = node;
|
|
- node += (sumSLOT + 1);
|
|
- cpus[node].cur.u += cpus[i].cur.u;
|
|
- cpus[node].cur.n += cpus[i].cur.n;
|
|
- cpus[node].cur.s += cpus[i].cur.s;
|
|
- cpus[node].cur.i += cpus[i].cur.i;
|
|
- cpus[node].cur.w += cpus[i].cur.w;
|
|
- cpus[node].cur.x += cpus[i].cur.x;
|
|
- cpus[node].cur.y += cpus[i].cur.y;
|
|
- cpus[node].cur.z += cpus[i].cur.z;
|
|
}
|
|
#endif
|
|
- }
|
|
+ } // end: for each cpu
|
|
|
|
Cpu_faux_tot = i; // tolerate cpus taken offline
|
|
|
|
--
|
|
1.7.9.2
|
|
|