0440703030
- PE: Demote from Master does not clear previous errors - crmd: Prevent secondary DC fencing resulting from CIB updates that are lost due to elections - crmd: Log duplicate DC detection as a WARNING not ERROR - crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith - Core: Treat GNUTLS_E_UNEXPECTED_PACKET_LENGTH as normal termination of a TLS session - cib: Call gnutls_bye() and shutdown() when disconnecting from remote TLS connections - cib: Remove disconnected remote connections from mainloop - cib: Attempt a graceful sign-off for remote TLS connections - Core: Ensure there is sufficient space for EOS when building short-form option strings (prevents segfault) - Core: Fix variable expansion in pkg-config files - PE: Resolve memory leak reported by valgrind - PE: Fix memory leak for re-allocated resources reported by valgrind - PE: Improve the merging with template's operations - crmd: Allow nodes to fence themselves if they're the last one standing (lf#2584) - stonith: Add an API call for listing installed agents - stonith: Allow the fencing history to be queried - stonith: Ensure completed operations are recorded as such in the history - stonith: Support --quiet to display just the seconds since epoch at which a node was last shot - stonith: Serialize actions for a given device - stonith: Add missing entries to stonith_error2string() (missing OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/pacemaker?expand=0&rev=18
299 lines
11 KiB
Diff
299 lines
11 KiB
Diff
# HG changeset patch
|
|
# User Dejan Muhamedagic <dejan@hello-penguin.com>
|
|
# Date 1314632951 -7200
|
|
# Node ID ccd0c1e1edf9f23cafb4363014acba755f1b4e25
|
|
# Parent d21f988a419c0c7fa349c4e26f6b500944d91370
|
|
Medium: Shell: several history improvements
|
|
|
|
- add more patterns for fencing
|
|
- handle better PE files number reaching limit
|
|
|
|
diff --git a/doc/crm.8.txt b/doc/crm.8.txt
|
|
--- a/doc/crm.8.txt
|
|
+++ b/doc/crm.8.txt
|
|
@@ -2426,7 +2426,8 @@ Example:
|
|
|
|
The `latest` command shows a bit of recent history, more
|
|
precisely whatever happened since the last cluster change (the
|
|
-latest transition).
|
|
+latest transition). If the transition is running, the shell will
|
|
+first wait until it finishes.
|
|
|
|
Usage:
|
|
...............
|
|
@@ -2540,10 +2541,13 @@ Example:
|
|
setnodes node_a node_b
|
|
...............
|
|
|
|
-[[cmdhelp_history_resource,resource failed actions]]
|
|
+[[cmdhelp_history_resource,resource events]]
|
|
==== `resource`
|
|
|
|
-Show status changes and any failures that happened on a resource.
|
|
+Show actions and any failures that happened on all specified
|
|
+resources on all nodes. Normally, one gives resource names as
|
|
+arguments, but it is also possible to use extended regular
|
|
+expressions.
|
|
|
|
Usage:
|
|
...............
|
|
@@ -2551,14 +2555,17 @@ Usage:
|
|
...............
|
|
Example:
|
|
...............
|
|
- resource mydb
|
|
+ resource bigdb public_ip
|
|
+ resource bigdb:0
|
|
+ resource bigdb:.
|
|
...............
|
|
|
|
[[cmdhelp_history_node,node events]]
|
|
==== `node`
|
|
|
|
Show important events that happened on a node. Important events
|
|
-are node lost and join, standby and online, and fence.
|
|
+are node lost and join, standby and online, and fence. Use either
|
|
+node names or extended regular expressions.
|
|
|
|
Usage:
|
|
...............
|
|
@@ -2572,7 +2579,17 @@ Example:
|
|
[[cmdhelp_history_log,log content]]
|
|
==== `log`
|
|
|
|
-Show logs for a node or combined logs of all nodes.
|
|
+Show messages logged on one or more nodes. Leaving out a node
|
|
+name produces combined logs of all nodes. Messages are sorted by
|
|
+time and, if the terminal emulations supports it, displayed in
|
|
+different colours depending on the node to allow for easier
|
|
+reading.
|
|
+
|
|
+The sorting key is the timestamp as written by syslog which
|
|
+normally has the maximum resolution of one second. Obviously,
|
|
+messages generated by events which share the same timestamp may
|
|
+not be sorted in the same way as they happened. Such close events
|
|
+may actually happen fairly often.
|
|
|
|
Usage:
|
|
...............
|
|
@@ -2634,8 +2651,8 @@ the transition are printed.
|
|
|
|
Usage:
|
|
...............
|
|
- transition [<number>|<file>] [nograph] [v...] [scores] [actions] [utilization]
|
|
- transition showdot [<number>|<file>]
|
|
+ transition [<number>|<index>|<file>] [nograph] [v...] [scores] [actions] [utilization]
|
|
+ transition showdot [<number>|<index>|<file>]
|
|
...............
|
|
Examples:
|
|
...............
|
|
diff --git a/shell/modules/log_patterns.py b/shell/modules/log_patterns.py
|
|
--- a/shell/modules/log_patterns.py
|
|
+++ b/shell/modules/log_patterns.py
|
|
@@ -12,34 +12,41 @@
|
|
# detail level 0 is the lowest, i.e. should match the least
|
|
# number of relevant messages
|
|
|
|
-# NB: If you modify this file, you must follow python syntax!
|
|
+# NB:
|
|
+# %% stands for whatever user input we get, for instance a
|
|
+# resource name or node name or just some regular expression
|
|
+# in optimal case, it should be surrounded by literals
|
|
+#
|
|
+# [Note that resources may contain clone numbers!]
|
|
|
|
log_patterns = {
|
|
"resource": (
|
|
( # detail 0
|
|
- "lrmd:.*rsc:%%.*(start|stop|promote|demote|migrate)",
|
|
- "lrmd:.*RA output:.*%%.*stderr",
|
|
- "lrmd:.*WARN:.*Managed.*%%.*exited",
|
|
+ "lrmd:.*rsc:%% (start|stop|promote|demote|migrate)",
|
|
+ "lrmd:.*RA output: .%%:.*:stderr",
|
|
+ "lrmd:.*WARN: Managed %%:.*exited",
|
|
),
|
|
( # detail 1
|
|
- "lrmd:.*rsc:%%.*(probe|notify)",
|
|
- "lrmd:.*info:.*Managed.*%%.*exited",
|
|
+ "lrmd:.*rsc:%%:.*(probe|notify)",
|
|
+ "lrmd:.*info: Managed %%:.*exited",
|
|
),
|
|
),
|
|
"node": (
|
|
( # detail 0
|
|
- "%%.*Corosync.Cluster.Engine",
|
|
- "%%.*Executive.Service.RELEASE",
|
|
- "%%.*crm_shutdown:.Requesting.shutdown",
|
|
- "%%.*pcmk_shutdown:.Shutdown.complete",
|
|
- "%%.*Configuration.validated..Starting.heartbeat",
|
|
- "pengine.*Scheduling Node %%",
|
|
- "te_fence_node.*Exec.*%%",
|
|
- "stonith-ng.*log_oper.*reboot.*%%",
|
|
- "stonithd.*to STONITH.*%%",
|
|
- "stonithd.*fenced node %%",
|
|
- "pcmk_peer_update.*(lost|memb): %%",
|
|
- "crmd.*ccm_event.*(NEW|LOST) %%",
|
|
+ " %% .*Corosync.Cluster.Engine",
|
|
+ " %% .*Executive.Service.RELEASE",
|
|
+ " %% .*crm_shutdown:.Requesting.shutdown",
|
|
+ " %% .*pcmk_shutdown:.Shutdown.complete",
|
|
+ " %% .*Configuration.validated..Starting.heartbeat",
|
|
+ "pengine.*Scheduling Node %% for STONITH",
|
|
+ "crmd.* tengine_stonith_callback: .* of %% failed",
|
|
+ "stonith-ng.*log_operation:.*host '%%'",
|
|
+ "te_fence_node: Exec.*on %% ",
|
|
+ "pe_fence_node: Node %% will be fenced",
|
|
+ "stonith-ng.*remote_op_timeout:.*for %% timed",
|
|
+ "stonithd.*Succeeded.*node %%:",
|
|
+ "pcmk_peer_update.*(lost|memb): %% ",
|
|
+ "crmd.*ccm_event.*(NEW|LOST):.* %% ",
|
|
),
|
|
( # detail 1
|
|
),
|
|
diff --git a/shell/modules/report.py b/shell/modules/report.py
|
|
--- a/shell/modules/report.py
|
|
+++ b/shell/modules/report.py
|
|
@@ -589,7 +589,7 @@ class Report(Singleton):
|
|
except IOError,msg:
|
|
common_err("open %s: %s"%(fl[0],msg))
|
|
continue
|
|
- pe_l = self.get_transitions([x for x in f], keep_pe_path = True)
|
|
+ pe_l = self.list_transitions([x for x in f], future_pe = True)
|
|
if pe_l:
|
|
l.append([node,pe_l])
|
|
return l
|
|
@@ -752,12 +752,13 @@ class Report(Singleton):
|
|
for n in self.cibnode_l:
|
|
self.nodecolor[n] = self.nodecolors[i]
|
|
i = (i+1) % len(self.nodecolors)
|
|
- def get_transitions(self, msg_l = None, keep_pe_path = False):
|
|
+ def list_transitions(self, msg_l = None, future_pe = False):
|
|
'''
|
|
- Get a list of transitions.
|
|
+ List transitions by reading logs.
|
|
Empty transitions are skipped.
|
|
- Some callers need original PE file path (keep_pe_path),
|
|
- otherwise we produce the path within the report.
|
|
+ Some callers need original PE file path (future_pe),
|
|
+ otherwise we produce the path within the report and check
|
|
+ if the transition files exist.
|
|
If the caller doesn't provide the message list, then we
|
|
build it from the collected log files (self.logobj).
|
|
Otherwise, we get matches for transition patterns.
|
|
@@ -786,11 +787,18 @@ class Report(Singleton):
|
|
continue
|
|
elif num_actions == -1: # couldn't find messages
|
|
common_warn("could not find number of actions for transition (%s)" % pe_base)
|
|
- common_debug("found PE input at %s: %s" % (node, pe_file))
|
|
- if keep_pe_path:
|
|
- pe_l.append(pe_file)
|
|
+ if not future_pe:
|
|
+ pe_l_file = os.path.join(self.loc, node, "pengine", pe_base)
|
|
+ if not os.path.isfile(pe_l_file):
|
|
+ warn_once("%s in the logs, but not in the report" % pe_l_file)
|
|
+ continue
|
|
else:
|
|
- pe_l.append(os.path.join(self.loc, node, "pengine", pe_base))
|
|
+ pe_l_file = "%s:%s" % (node, pe_file)
|
|
+ if pe_l_file in pe_l:
|
|
+ common_warn("duplicate %s, replacing older PE file" % pe_l_file)
|
|
+ pe_l.remove(pe_l_file)
|
|
+ common_debug("found PE input: %s" % pe_l_file)
|
|
+ pe_l.append(pe_l_file)
|
|
return pe_l
|
|
def report_setup(self):
|
|
if not self.loc:
|
|
@@ -802,11 +810,7 @@ class Report(Singleton):
|
|
self.set_node_colors()
|
|
self.logobj = LogSyslog(self.central_log, self.log_l, \
|
|
self.from_dt, self.to_dt)
|
|
- self.peinputs_l = self.get_transitions()
|
|
- for pe_input in self.peinputs_l:
|
|
- if not os.path.isfile(pe_input):
|
|
- warn_once("%s in the logs, but not in the report" % pe_input)
|
|
- self.peinputs_l.remove(pe_input)
|
|
+ self.peinputs_l = self.list_transitions()
|
|
def prepare_source(self):
|
|
'''
|
|
Unpack a hb_report tarball.
|
|
@@ -859,7 +863,7 @@ class Report(Singleton):
|
|
if not args:
|
|
re_l = mk_re_list(patt_l,"")
|
|
else:
|
|
- re_l = mk_re_list(patt_l,r'(%s)\W' % "|".join(args))
|
|
+ re_l = mk_re_list(patt_l,r'(%s)' % "|".join(args))
|
|
return re_l
|
|
def disp(self, s):
|
|
'color output'
|
|
@@ -886,11 +890,6 @@ class Report(Singleton):
|
|
self.error("no logs found")
|
|
return
|
|
self.display_logs(self.logobj.get_matches(re_l, log_l))
|
|
- def match_args(self, cib_l, args):
|
|
- for a in args:
|
|
- a_clone = re.sub(r':.*', '', a)
|
|
- if not (a in cib_l) and not (a_clone in cib_l):
|
|
- self.warn("%s not found in report, proceeding anyway" % a)
|
|
def get_desc_line(self,fld):
|
|
try:
|
|
f = open(self.desc)
|
|
@@ -923,8 +922,9 @@ class Report(Singleton):
|
|
'''
|
|
Show all events.
|
|
'''
|
|
- all_re_l = self.build_re("resource",self.cibrsc_l) + \
|
|
- self.build_re("node",self.cibnode_l)
|
|
+ all_re_l = self.build_re("resource", self.cibrsc_l) + \
|
|
+ self.build_re("node", self.cibnode_l) + \
|
|
+ self.build_re("events", [])
|
|
if not all_re_l:
|
|
self.error("no resources or nodes found")
|
|
return False
|
|
@@ -940,6 +940,7 @@ class Report(Singleton):
|
|
te_invoke_patt = transition_patt[0].replace("%%", pe_num)
|
|
run_patt = transition_patt[1].replace("%%", pe_num)
|
|
r = None
|
|
+ msg_l.reverse()
|
|
for msg in msg_l:
|
|
r = re.search(te_invoke_patt, msg)
|
|
if r:
|
|
@@ -1009,7 +1010,6 @@ class Report(Singleton):
|
|
expanded_l += self.cibgrp_d[a]
|
|
else:
|
|
expanded_l.append(a)
|
|
- self.match_args(self.cibrsc_l,expanded_l)
|
|
rsc_re_l = self.build_re("resource",expanded_l)
|
|
if not rsc_re_l:
|
|
return False
|
|
@@ -1020,7 +1020,6 @@ class Report(Singleton):
|
|
'''
|
|
if not self.prepare_source():
|
|
return False
|
|
- self.match_args(self.cibnode_l,args)
|
|
node_re_l = self.build_re("node",args)
|
|
if not node_re_l:
|
|
return False
|
|
diff --git a/shell/modules/ui.py.in b/shell/modules/ui.py.in
|
|
--- a/shell/modules/ui.py.in
|
|
+++ b/shell/modules/ui.py.in
|
|
@@ -1877,16 +1877,16 @@ Examine Pacemaker's history: node and re
|
|
def _get_pe_byidx(self, idx):
|
|
l = crm_report.pelist()
|
|
if len(l) < abs(idx):
|
|
- common_err("pe input file for index %d not found" % (idx+1))
|
|
+ common_err("PE input file for index %d not found" % (idx+1))
|
|
return None
|
|
return l[idx]
|
|
def _get_pe_bynum(self, n):
|
|
l = crm_report.pelist([n])
|
|
if len(l) == 0:
|
|
- common_err("%s: PE file %d not found" % n)
|
|
+ common_err("PE file %d not found" % n)
|
|
return None
|
|
elif len(l) > 1:
|
|
- common_err("%s: PE file %d ambiguous" % n)
|
|
+ common_err("PE file %d ambiguous" % n)
|
|
return None
|
|
return l[0]
|
|
def transition(self,cmd,*args):
|