pacemaker/crm_history_7_3f3c348aaaed.patch

# HG changeset patch
# User Dejan Muhamedagic <dejan@hello-penguin.com>
# Date 1313413824 -7200
# Node ID 3f3c348aaaed52383f6646b08899943aec8911f4
# Parent  441f4448eba6eda1a2cf44d3d63a0db9f8d56a20
Medium: Shell: relax transition acceptance

Sometimes logs are missing one or another transition related
message. Try to be more forgiving then.
Also, print information about number of actions which were
completed, skipped, etc.

diff --git a/shell/modules/report.py b/shell/modules/report.py
--- a/shell/modules/report.py
+++ b/shell/modules/report.py
@@ -320,10 +320,8 @@ def is_log(p):
     return os.path.isfile(p) and os.path.getsize(p) > 0
 
 def pe_file_in_range(pe_f, a):
-    r = re.search("pe-[^-]+-([0-9]+)[.]bz2$", pe_f)
-    if not r:
-        return None
-    if not a or (a[0] <= int(r.group(1)) <= a[1]):
+    pe_num = get_pe_num(pe_f)
+    if not a or (a[0] <= int(pe_num) <= a[1]):
         return pe_f
     return None
 
@@ -347,6 +345,12 @@ def update_loginfo(rptlog, logfile, oldp
     except IOError, msg:
         common_err("couldn't the update %s.info: %s" % (rptlog, msg))
 
+def get_pe_num(pe_file):
+    try:
+        return re.search("pe-[^-]+-([0-9]+)[.]", pe_file).group(1)
+    except:
+        return "-1"
+
 # r.group(1) transition number (a different thing from file number)
 # r.group(2) contains full path
 # r.group(3) file number
@@ -358,6 +362,40 @@ transition_patt = (
 	"crmd: .* unpack_graph: Unpacked transition (%%): ([0-9]+) actions", # number of actions
 )
 
+def run_graph_msg_actions(msg):
+    '''
+    crmd: [13667]: info: run_graph: Transition 399 (Complete=5,
+    Pending=1, Fired=1, Skipped=0, Incomplete=3,
+    Source=...
+    '''
+    d = {}
+    s = msg
+    while True:
+        r = re.search("([A-Z][a-z]+)=([0-9]+)", s)
+        if not r:
+            return d
+        d[r.group(1)] = int(r.group(2))
+        s = s[r.end():]
+def transition_actions(msg_l, te_invoke_msg, pe_file):
+    '''
+    Get the number of actions for the transition.
+    '''
+    # check if there were any actions in this transition
+    pe_num = get_pe_num(pe_file)
+    te_invoke_patt = transition_patt[0].replace("%%", pe_num)
+    run_patt = transition_patt[1].replace("%%", pe_num)
+    r = re.search(te_invoke_patt, te_invoke_msg)
+    trans_num = r.group(1)
+    unpack_patt = transition_patt[2].replace("%%", trans_num)
+    for msg in msg_l:
+        try:
+            return int(re.search(unpack_patt, msg).group(2))
+        except:
+            if re.search(run_patt, msg):
+                act_d = run_graph_msg_actions(msg)
+                return sum(act_d.values())
+    return -1
+
 class Report(Singleton):
     '''
     A hb_report class.
@@ -396,8 +434,7 @@ class Report(Singleton):
     def node_list(self):
         return self.cibnode_l
     def peinputs_list(self):
-        return [re.search("pe-[^-]+-([0-9]+)[.]bz2$", x).group(1)
-                for x in self.peinputs_l]
+        return [get_pe_num(x) for x in self.peinputs_l]
     def unpack_report(self, tarball):
         '''
         Unpack hb_report tarball.
@@ -712,8 +749,6 @@ class Report(Singleton):
         '''
         Get a list of transitions.
         Empty transitions are skipped.
-        We use the unpack_graph message to see the number of
-        actions.
         Some callers need original PE file path (keep_pe_path),
         otherwise we produce the path within the report.
         If the caller doesn't provide the message list, then we
@@ -738,19 +773,12 @@ class Report(Singleton):
             node = msg_a[3]
             pe_file = msg_a[-1]
             pe_base = os.path.basename(pe_file)
-            # check if there were any actions in this transition
-            r = re.search(trans_re_l[0], msg)
-            trans_num = r.group(1)
-            unpack_patt = transition_patt[2].replace("%%", trans_num)
-            num_actions = 0
-            for t in msg_l:
-                try:
-                    num_actions = int(re.search(unpack_patt, t).group(2))
-                    break
-                except: pass
+            num_actions = transition_actions(msg_l, msg, pe_file)
             if num_actions == 0: # empty transition
-                common_debug("skipping empty transition %s (%s)" % (trans_num, pe_base))
+                common_debug("skipping empty transition (%s)" % pe_base)
                 continue
+            elif num_actions == -1: # couldn't find messages
+                common_warn("could not find number of actions for transition (%s)" % pe_base)
             common_debug("found PE input at %s: %s" % (node, pe_file))
             if keep_pe_path:
                 pe_l.append(pe_file)
@@ -894,6 +922,34 @@ class Report(Singleton):
             self.error("no resources or nodes found")
             return False
         self.show_logs(re_l = all_re_l)
+    def get_transition_msgs(self, pe_file, msg_l = []):
+        if not msg_l:
+            trans_re_l = [x.replace("%%", "[0-9]+") for x in transition_patt]
+            msg_l = self.logobj.get_matches(trans_re_l)
+        te_invoke_msg = ""
+        run_msg = ""
+        unpack_msg = ""
+        pe_num = get_pe_num(pe_file)
+        te_invoke_patt = transition_patt[0].replace("%%", pe_num)
+        run_patt = transition_patt[1].replace("%%", pe_num)
+        r = None
+        for msg in msg_l:
+            r = re.search(te_invoke_patt, msg)
+            if r:
+                te_invoke_msg = msg
+                break
+        if not r:
+            return ["", "", ""]
+        trans_num = r.group(1)
+        unpack_patt = transition_patt[2].replace("%%", trans_num)
+        for msg in msg_l:
+            if re.search(run_patt, msg):
+                run_msg = msg
+            elif re.search(unpack_patt, msg):
+                unpack_msg = msg
+            if run_msg and unpack_msg:
+                break
+        return [unpack_msg, te_invoke_msg, run_msg]
     def show_transition_log(self, pe_file):
         '''
         Search for events within the given transition.
@@ -901,28 +957,34 @@ class Report(Singleton):
         if not self.prepare_source():
             return False
         pe_base = os.path.basename(pe_file)
-        r = re.search("pe-[^-]+-([0-9]+)[.]", pe_base)
-        pe_num = r.group(1)
-        trans_re_l = [x.replace("%%",pe_num) for x in transition_patt]
-        trans_start = self.logobj.search_logs(self.log_l, trans_re_l[0])
-        trans_end = self.logobj.search_logs(self.log_l, trans_re_l[1])
-        if not trans_start:
+        pe_num = get_pe_num(pe_base)
+        unpack_msg, te_invoke_msg, run_msg = self.get_transition_msgs(pe_file)
+        if not te_invoke_msg:
             common_warn("start of transition %s not found in logs" % pe_base)
             return False
-        if not trans_end:
+        if not run_msg:
             common_warn("end of transition %s not found in logs (transition not complete yet?)" % pe_base)
             return False
-        common_debug("transition start: %s" % trans_start[0])
-        common_debug("transition end: %s" % trans_end[0])
-        start_ts = syslog_ts(trans_start[0])
-        end_ts = syslog_ts(trans_end[0])
+        common_debug("transition start: %s" % te_invoke_msg)
+        common_debug("transition end: %s" % run_msg)
+        start_ts = syslog_ts(te_invoke_msg)
+        end_ts = syslog_ts(run_msg)
         if not start_ts or not end_ts:
             self.warn("strange, no timestamps found")
             return False
-        # limit the log scope temporarily
+        act_d = run_graph_msg_actions(run_msg)
+        total = sum(act_d.values())
+        s = ""
+        for a in act_d:
+            if not act_d[a]:
+                continue
+            s = "%s %s=%d" % (s, a, act_d[a])
+        common_info("transition %s %d actions: %s" %
+            (pe_file.replace(self.loc+"/",""), total, s))
         common_info("logs for transition %s (%s-%s)" %
             (pe_file.replace(self.loc+"/",""), \
             shorttime(start_ts), shorttime(end_ts)))
+        # limit the log scope temporarily
         self.logobj.set_log_timeframe(start_ts, end_ts)
         self.events()
         self.logobj.set_log_timeframe(self.from_dt, self.to_dt)
- Upgrade to 1.1.6. - PE: Demote from Master does not clear previous errors - crmd: Prevent secondary DC fencing resulting from CIB updates that are lost due to elections - crmd: Log duplicate DC detection as a WARNING not ERROR - crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith - Core: Treat GNUTLS_E_UNEXPECTED_PACKET_LENGTH as normal termination of a TLS session - cib: Call gnutls_bye() and shutdown() when disconnecting from remote TLS connections - cib: Remove disconnected remote connections from mainloop - cib: Attempt a graceful sign-off for remote TLS connections - Core: Ensure there is sufficient space for EOS when building short-form option strings (prevents segfault) - Core: Fix variable expansion in pkg-config files - PE: Resolve memory leak reported by valgrind - PE: Fix memory leak for re-allocated resources reported by valgrind - PE: Improve the merging with template's operations - crmd: Allow nodes to fence themselves if they're the last one standing (lf#2584) - stonith: Add an API call for listing installed agents - stonith: Allow the fencing history to be queried - stonith: Ensure completed operations are recorded as such in the history - stonith: Support --quiet to display just the seconds since epoch at which a node was last shot - stonith: Serialize actions for a given device - stonith: Add missing entries to stonith_error2string() (missing OBS-URL: https://build.opensuse.org/package/show/network:ha-clustering:Factory/pacemaker?expand=0&rev=18 2011-09-20 14:36:23 +00:00			`# HG changeset patch`
			`# User Dejan Muhamedagic <dejan@hello-penguin.com>`
			`# Date 1313413824 -7200`
			`# Node ID 3f3c348aaaed52383f6646b08899943aec8911f4`
			`# Parent 441f4448eba6eda1a2cf44d3d63a0db9f8d56a20`
			`Medium: Shell: relax transition acceptance`

			`Sometimes logs are missing one or another transition related`
			`message. Try to be more forgiving then.`
			`Also, print information about number of actions which were`
			`completed, skipped, etc.`

			`diff --git a/shell/modules/report.py b/shell/modules/report.py`
			`--- a/shell/modules/report.py`
			`+++ b/shell/modules/report.py`
			`@@ -320,10 +320,8 @@ def is_log(p):`
			`return os.path.isfile(p) and os.path.getsize(p) > 0`

			`def pe_file_in_range(pe_f, a):`
			`- r = re.search("pe-[^-]+-([0-9]+)[.]bz2$", pe_f)`
			`- if not r:`
			`- return None`
			`- if not a or (a[0] <= int(r.group(1)) <= a[1]):`
			`+ pe_num = get_pe_num(pe_f)`
			`+ if not a or (a[0] <= int(pe_num) <= a[1]):`
			`return pe_f`
			`return None`

			`@@ -347,6 +345,12 @@ def update_loginfo(rptlog, logfile, oldp`
			`except IOError, msg:`
			`common_err("couldn't the update %s.info: %s" % (rptlog, msg))`

			`+def get_pe_num(pe_file):`
			`+ try:`
			`+ return re.search("pe-[^-]+-([0-9]+)[.]", pe_file).group(1)`
			`+ except:`
			`+ return "-1"`
			`+`
			`# r.group(1) transition number (a different thing from file number)`
			`# r.group(2) contains full path`
			`# r.group(3) file number`
			`@@ -358,6 +362,40 @@ transition_patt = (`
			`"crmd: .* unpack_graph: Unpacked transition (%%): ([0-9]+) actions", # number of actions`
			`)`

			`+def run_graph_msg_actions(msg):`
			`+ '''`
			`+ crmd: [13667]: info: run_graph: Transition 399 (Complete=5,`
			`+ Pending=1, Fired=1, Skipped=0, Incomplete=3,`
			`+ Source=...`
			`+ '''`
			`+ d = {}`
			`+ s = msg`
			`+ while True:`
			`+ r = re.search("([A-Z][a-z]+)=([0-9]+)", s)`
			`+ if not r:`
			`+ return d`
			`+ d[r.group(1)] = int(r.group(2))`
			`+ s = s[r.end():]`
			`+def transition_actions(msg_l, te_invoke_msg, pe_file):`
			`+ '''`
			`+ Get the number of actions for the transition.`
			`+ '''`
			`+ # check if there were any actions in this transition`
			`+ pe_num = get_pe_num(pe_file)`
			`+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)`
			`+ run_patt = transition_patt[1].replace("%%", pe_num)`
			`+ r = re.search(te_invoke_patt, te_invoke_msg)`
			`+ trans_num = r.group(1)`
			`+ unpack_patt = transition_patt[2].replace("%%", trans_num)`
			`+ for msg in msg_l:`
			`+ try:`
			`+ return int(re.search(unpack_patt, msg).group(2))`
			`+ except:`
			`+ if re.search(run_patt, msg):`
			`+ act_d = run_graph_msg_actions(msg)`
			`+ return sum(act_d.values())`
			`+ return -1`
			`+`
			`class Report(Singleton):`
			`'''`
			`A hb_report class.`
			`@@ -396,8 +434,7 @@ class Report(Singleton):`
			`def node_list(self):`
			`return self.cibnode_l`
			`def peinputs_list(self):`
			`- return [re.search("pe-[^-]+-([0-9]+)[.]bz2$", x).group(1)`
			`- for x in self.peinputs_l]`
			`+ return [get_pe_num(x) for x in self.peinputs_l]`
			`def unpack_report(self, tarball):`
			`'''`
			`Unpack hb_report tarball.`
			`@@ -712,8 +749,6 @@ class Report(Singleton):`
			`'''`
			`Get a list of transitions.`
			`Empty transitions are skipped.`
			`- We use the unpack_graph message to see the number of`
			`- actions.`
			`Some callers need original PE file path (keep_pe_path),`
			`otherwise we produce the path within the report.`
			`If the caller doesn't provide the message list, then we`
			`@@ -738,19 +773,12 @@ class Report(Singleton):`
			`node = msg_a[3]`
			`pe_file = msg_a[-1]`
			`pe_base = os.path.basename(pe_file)`
			`- # check if there were any actions in this transition`
			`- r = re.search(trans_re_l[0], msg)`
			`- trans_num = r.group(1)`
			`- unpack_patt = transition_patt[2].replace("%%", trans_num)`
			`- num_actions = 0`
			`- for t in msg_l:`
			`- try:`
			`- num_actions = int(re.search(unpack_patt, t).group(2))`
			`- break`
			`- except: pass`
			`+ num_actions = transition_actions(msg_l, msg, pe_file)`
			`if num_actions == 0: # empty transition`
			`- common_debug("skipping empty transition %s (%s)" % (trans_num, pe_base))`
			`+ common_debug("skipping empty transition (%s)" % pe_base)`
			`continue`
			`+ elif num_actions == -1: # couldn't find messages`
			`+ common_warn("could not find number of actions for transition (%s)" % pe_base)`
			`common_debug("found PE input at %s: %s" % (node, pe_file))`
			`if keep_pe_path:`
			`pe_l.append(pe_file)`
			`@@ -894,6 +922,34 @@ class Report(Singleton):`
			`self.error("no resources or nodes found")`
			`return False`
			`self.show_logs(re_l = all_re_l)`
			`+ def get_transition_msgs(self, pe_file, msg_l = []):`
			`+ if not msg_l:`
			`+ trans_re_l = [x.replace("%%", "[0-9]+") for x in transition_patt]`
			`+ msg_l = self.logobj.get_matches(trans_re_l)`
			`+ te_invoke_msg = ""`
			`+ run_msg = ""`
			`+ unpack_msg = ""`
			`+ pe_num = get_pe_num(pe_file)`
			`+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)`
			`+ run_patt = transition_patt[1].replace("%%", pe_num)`
			`+ r = None`
			`+ for msg in msg_l:`
			`+ r = re.search(te_invoke_patt, msg)`
			`+ if r:`
			`+ te_invoke_msg = msg`
			`+ break`
			`+ if not r:`
			`+ return ["", "", ""]`
			`+ trans_num = r.group(1)`
			`+ unpack_patt = transition_patt[2].replace("%%", trans_num)`
			`+ for msg in msg_l:`
			`+ if re.search(run_patt, msg):`
			`+ run_msg = msg`
			`+ elif re.search(unpack_patt, msg):`
			`+ unpack_msg = msg`
			`+ if run_msg and unpack_msg:`
			`+ break`
			`+ return [unpack_msg, te_invoke_msg, run_msg]`
			`def show_transition_log(self, pe_file):`
			`'''`
			`Search for events within the given transition.`
			`@@ -901,28 +957,34 @@ class Report(Singleton):`
			`if not self.prepare_source():`
			`return False`
			`pe_base = os.path.basename(pe_file)`
			`- r = re.search("pe-[^-]+-([0-9]+)[.]", pe_base)`
			`- pe_num = r.group(1)`
			`- trans_re_l = [x.replace("%%",pe_num) for x in transition_patt]`
			`- trans_start = self.logobj.search_logs(self.log_l, trans_re_l[0])`
			`- trans_end = self.logobj.search_logs(self.log_l, trans_re_l[1])`
			`- if not trans_start:`
			`+ pe_num = get_pe_num(pe_base)`
			`+ unpack_msg, te_invoke_msg, run_msg = self.get_transition_msgs(pe_file)`
			`+ if not te_invoke_msg:`
			`common_warn("start of transition %s not found in logs" % pe_base)`
			`return False`
			`- if not trans_end:`
			`+ if not run_msg:`
			`common_warn("end of transition %s not found in logs (transition not complete yet?)" % pe_base)`
			`return False`
			`- common_debug("transition start: %s" % trans_start[0])`
			`- common_debug("transition end: %s" % trans_end[0])`
			`- start_ts = syslog_ts(trans_start[0])`
			`- end_ts = syslog_ts(trans_end[0])`
			`+ common_debug("transition start: %s" % te_invoke_msg)`
			`+ common_debug("transition end: %s" % run_msg)`
			`+ start_ts = syslog_ts(te_invoke_msg)`
			`+ end_ts = syslog_ts(run_msg)`
			`if not start_ts or not end_ts:`
			`self.warn("strange, no timestamps found")`
			`return False`
			`- # limit the log scope temporarily`
			`+ act_d = run_graph_msg_actions(run_msg)`
			`+ total = sum(act_d.values())`
			`+ s = ""`
			`+ for a in act_d:`
			`+ if not act_d[a]:`
			`+ continue`
			`+ s = "%s %s=%d" % (s, a, act_d[a])`
			`+ common_info("transition %s %d actions: %s" %`
			`+ (pe_file.replace(self.loc+"/",""), total, s))`
			`common_info("logs for transition %s (%s-%s)" %`
			`(pe_file.replace(self.loc+"/",""), \`
			`shorttime(start_ts), shorttime(end_ts)))`
			`+ # limit the log scope temporarily`
			`self.logobj.set_log_timeframe(start_ts, end_ts)`
			`self.events()`
			`self.logobj.set_log_timeframe(self.from_dt, self.to_dt)`