0440703030
- PE: Demote from Master does not clear previous errors - crmd: Prevent secondary DC fencing resulting from CIB updates that are lost due to elections - crmd: Log duplicate DC detection as a WARNING not ERROR - crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith - Core: Treat GNUTLS_E_UNEXPECTED_PACKET_LENGTH as normal termination of a TLS session - cib: Call gnutls_bye() and shutdown() when disconnecting from remote TLS connections - cib: Remove disconnected remote connections from mainloop - cib: Attempt a graceful sign-off for remote TLS connections - Core: Ensure there is sufficient space for EOS when building short-form option strings (prevents segfault) - Core: Fix variable expansion in pkg-config files - PE: Resolve memory leak reported by valgrind - PE: Fix memory leak for re-allocated resources reported by valgrind - PE: Improve the merging with template's operations - crmd: Allow nodes to fence themselves if they're the last one standing (lf#2584) - stonith: Add an API call for listing installed agents - stonith: Allow the fencing history to be queried - stonith: Ensure completed operations are recorded as such in the history - stonith: Support --quiet to display just the seconds since epoch at which a node was last shot - stonith: Serialize actions for a given device - stonith: Add missing entries to stonith_error2string() (missing OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/pacemaker?expand=0&rev=18
208 lines
8.0 KiB
Diff
208 lines
8.0 KiB
Diff
# HG changeset patch
|
|
# User Dejan Muhamedagic <dejan@hello-penguin.com>
|
|
# Date 1313413824 -7200
|
|
# Node ID 3f3c348aaaed52383f6646b08899943aec8911f4
|
|
# Parent 441f4448eba6eda1a2cf44d3d63a0db9f8d56a20
|
|
Medium: Shell: relax transition acceptance
|
|
|
|
Sometimes logs are missing one or another transition related
|
|
message. Try to be more forgiving then.
|
|
Also, print information about number of actions which were
|
|
completed, skipped, etc.
|
|
|
|
diff --git a/shell/modules/report.py b/shell/modules/report.py
|
|
--- a/shell/modules/report.py
|
|
+++ b/shell/modules/report.py
|
|
@@ -320,10 +320,8 @@ def is_log(p):
|
|
return os.path.isfile(p) and os.path.getsize(p) > 0
|
|
|
|
def pe_file_in_range(pe_f, a):
|
|
- r = re.search("pe-[^-]+-([0-9]+)[.]bz2$", pe_f)
|
|
- if not r:
|
|
- return None
|
|
- if not a or (a[0] <= int(r.group(1)) <= a[1]):
|
|
+ pe_num = get_pe_num(pe_f)
|
|
+ if not a or (a[0] <= int(pe_num) <= a[1]):
|
|
return pe_f
|
|
return None
|
|
|
|
@@ -347,6 +345,12 @@ def update_loginfo(rptlog, logfile, oldp
|
|
except IOError, msg:
|
|
common_err("couldn't the update %s.info: %s" % (rptlog, msg))
|
|
|
|
+def get_pe_num(pe_file):
|
|
+ try:
|
|
+ return re.search("pe-[^-]+-([0-9]+)[.]", pe_file).group(1)
|
|
+ except:
|
|
+ return "-1"
|
|
+
|
|
# r.group(1) transition number (a different thing from file number)
|
|
# r.group(2) contains full path
|
|
# r.group(3) file number
|
|
@@ -358,6 +362,40 @@ transition_patt = (
|
|
"crmd: .* unpack_graph: Unpacked transition (%%): ([0-9]+) actions", # number of actions
|
|
)
|
|
|
|
+def run_graph_msg_actions(msg):
|
|
+ '''
|
|
+ crmd: [13667]: info: run_graph: Transition 399 (Complete=5,
|
|
+ Pending=1, Fired=1, Skipped=0, Incomplete=3,
|
|
+ Source=...
|
|
+ '''
|
|
+ d = {}
|
|
+ s = msg
|
|
+ while True:
|
|
+ r = re.search("([A-Z][a-z]+)=([0-9]+)", s)
|
|
+ if not r:
|
|
+ return d
|
|
+ d[r.group(1)] = int(r.group(2))
|
|
+ s = s[r.end():]
|
|
+def transition_actions(msg_l, te_invoke_msg, pe_file):
|
|
+ '''
|
|
+ Get the number of actions for the transition.
|
|
+ '''
|
|
+ # check if there were any actions in this transition
|
|
+ pe_num = get_pe_num(pe_file)
|
|
+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)
|
|
+ run_patt = transition_patt[1].replace("%%", pe_num)
|
|
+ r = re.search(te_invoke_patt, te_invoke_msg)
|
|
+ trans_num = r.group(1)
|
|
+ unpack_patt = transition_patt[2].replace("%%", trans_num)
|
|
+ for msg in msg_l:
|
|
+ try:
|
|
+ return int(re.search(unpack_patt, msg).group(2))
|
|
+ except:
|
|
+ if re.search(run_patt, msg):
|
|
+ act_d = run_graph_msg_actions(msg)
|
|
+ return sum(act_d.values())
|
|
+ return -1
|
|
+
|
|
class Report(Singleton):
|
|
'''
|
|
A hb_report class.
|
|
@@ -396,8 +434,7 @@ class Report(Singleton):
|
|
def node_list(self):
|
|
return self.cibnode_l
|
|
def peinputs_list(self):
|
|
- return [re.search("pe-[^-]+-([0-9]+)[.]bz2$", x).group(1)
|
|
- for x in self.peinputs_l]
|
|
+ return [get_pe_num(x) for x in self.peinputs_l]
|
|
def unpack_report(self, tarball):
|
|
'''
|
|
Unpack hb_report tarball.
|
|
@@ -712,8 +749,6 @@ class Report(Singleton):
|
|
'''
|
|
Get a list of transitions.
|
|
Empty transitions are skipped.
|
|
- We use the unpack_graph message to see the number of
|
|
- actions.
|
|
Some callers need original PE file path (keep_pe_path),
|
|
otherwise we produce the path within the report.
|
|
If the caller doesn't provide the message list, then we
|
|
@@ -738,19 +773,12 @@ class Report(Singleton):
|
|
node = msg_a[3]
|
|
pe_file = msg_a[-1]
|
|
pe_base = os.path.basename(pe_file)
|
|
- # check if there were any actions in this transition
|
|
- r = re.search(trans_re_l[0], msg)
|
|
- trans_num = r.group(1)
|
|
- unpack_patt = transition_patt[2].replace("%%", trans_num)
|
|
- num_actions = 0
|
|
- for t in msg_l:
|
|
- try:
|
|
- num_actions = int(re.search(unpack_patt, t).group(2))
|
|
- break
|
|
- except: pass
|
|
+ num_actions = transition_actions(msg_l, msg, pe_file)
|
|
if num_actions == 0: # empty transition
|
|
- common_debug("skipping empty transition %s (%s)" % (trans_num, pe_base))
|
|
+ common_debug("skipping empty transition (%s)" % pe_base)
|
|
continue
|
|
+ elif num_actions == -1: # couldn't find messages
|
|
+ common_warn("could not find number of actions for transition (%s)" % pe_base)
|
|
common_debug("found PE input at %s: %s" % (node, pe_file))
|
|
if keep_pe_path:
|
|
pe_l.append(pe_file)
|
|
@@ -894,6 +922,34 @@ class Report(Singleton):
|
|
self.error("no resources or nodes found")
|
|
return False
|
|
self.show_logs(re_l = all_re_l)
|
|
+ def get_transition_msgs(self, pe_file, msg_l = []):
|
|
+ if not msg_l:
|
|
+ trans_re_l = [x.replace("%%", "[0-9]+") for x in transition_patt]
|
|
+ msg_l = self.logobj.get_matches(trans_re_l)
|
|
+ te_invoke_msg = ""
|
|
+ run_msg = ""
|
|
+ unpack_msg = ""
|
|
+ pe_num = get_pe_num(pe_file)
|
|
+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)
|
|
+ run_patt = transition_patt[1].replace("%%", pe_num)
|
|
+ r = None
|
|
+ for msg in msg_l:
|
|
+ r = re.search(te_invoke_patt, msg)
|
|
+ if r:
|
|
+ te_invoke_msg = msg
|
|
+ break
|
|
+ if not r:
|
|
+ return ["", "", ""]
|
|
+ trans_num = r.group(1)
|
|
+ unpack_patt = transition_patt[2].replace("%%", trans_num)
|
|
+ for msg in msg_l:
|
|
+ if re.search(run_patt, msg):
|
|
+ run_msg = msg
|
|
+ elif re.search(unpack_patt, msg):
|
|
+ unpack_msg = msg
|
|
+ if run_msg and unpack_msg:
|
|
+ break
|
|
+ return [unpack_msg, te_invoke_msg, run_msg]
|
|
def show_transition_log(self, pe_file):
|
|
'''
|
|
Search for events within the given transition.
|
|
@@ -901,28 +957,34 @@ class Report(Singleton):
|
|
if not self.prepare_source():
|
|
return False
|
|
pe_base = os.path.basename(pe_file)
|
|
- r = re.search("pe-[^-]+-([0-9]+)[.]", pe_base)
|
|
- pe_num = r.group(1)
|
|
- trans_re_l = [x.replace("%%",pe_num) for x in transition_patt]
|
|
- trans_start = self.logobj.search_logs(self.log_l, trans_re_l[0])
|
|
- trans_end = self.logobj.search_logs(self.log_l, trans_re_l[1])
|
|
- if not trans_start:
|
|
+ pe_num = get_pe_num(pe_base)
|
|
+ unpack_msg, te_invoke_msg, run_msg = self.get_transition_msgs(pe_file)
|
|
+ if not te_invoke_msg:
|
|
common_warn("start of transition %s not found in logs" % pe_base)
|
|
return False
|
|
- if not trans_end:
|
|
+ if not run_msg:
|
|
common_warn("end of transition %s not found in logs (transition not complete yet?)" % pe_base)
|
|
return False
|
|
- common_debug("transition start: %s" % trans_start[0])
|
|
- common_debug("transition end: %s" % trans_end[0])
|
|
- start_ts = syslog_ts(trans_start[0])
|
|
- end_ts = syslog_ts(trans_end[0])
|
|
+ common_debug("transition start: %s" % te_invoke_msg)
|
|
+ common_debug("transition end: %s" % run_msg)
|
|
+ start_ts = syslog_ts(te_invoke_msg)
|
|
+ end_ts = syslog_ts(run_msg)
|
|
if not start_ts or not end_ts:
|
|
self.warn("strange, no timestamps found")
|
|
return False
|
|
- # limit the log scope temporarily
|
|
+ act_d = run_graph_msg_actions(run_msg)
|
|
+ total = sum(act_d.values())
|
|
+ s = ""
|
|
+ for a in act_d:
|
|
+ if not act_d[a]:
|
|
+ continue
|
|
+ s = "%s %s=%d" % (s, a, act_d[a])
|
|
+ common_info("transition %s %d actions: %s" %
|
|
+ (pe_file.replace(self.loc+"/",""), total, s))
|
|
common_info("logs for transition %s (%s-%s)" %
|
|
(pe_file.replace(self.loc+"/",""), \
|
|
shorttime(start_ts), shorttime(end_ts)))
|
|
+ # limit the log scope temporarily
|
|
self.logobj.set_log_timeframe(start_ts, end_ts)
|
|
self.events()
|
|
self.logobj.set_log_timeframe(self.from_dt, self.to_dt)
|