208 lines
8.0 KiB
Diff
208 lines
8.0 KiB
Diff
|
# HG changeset patch
|
||
|
# User Dejan Muhamedagic <dejan@hello-penguin.com>
|
||
|
# Date 1313413824 -7200
|
||
|
# Node ID 3f3c348aaaed52383f6646b08899943aec8911f4
|
||
|
# Parent 441f4448eba6eda1a2cf44d3d63a0db9f8d56a20
|
||
|
Medium: Shell: relax transition acceptance
|
||
|
|
||
|
Sometimes logs are missing one or another transition related
|
||
|
message. Try to be more forgiving then.
|
||
|
Also, print information about number of actions which were
|
||
|
completed, skipped, etc.
|
||
|
|
||
|
diff --git a/shell/modules/report.py b/shell/modules/report.py
|
||
|
--- a/shell/modules/report.py
|
||
|
+++ b/shell/modules/report.py
|
||
|
@@ -320,10 +320,8 @@ def is_log(p):
|
||
|
return os.path.isfile(p) and os.path.getsize(p) > 0
|
||
|
|
||
|
def pe_file_in_range(pe_f, a):
|
||
|
- r = re.search("pe-[^-]+-([0-9]+)[.]bz2$", pe_f)
|
||
|
- if not r:
|
||
|
- return None
|
||
|
- if not a or (a[0] <= int(r.group(1)) <= a[1]):
|
||
|
+ pe_num = get_pe_num(pe_f)
|
||
|
+ if not a or (a[0] <= int(pe_num) <= a[1]):
|
||
|
return pe_f
|
||
|
return None
|
||
|
|
||
|
@@ -347,6 +345,12 @@ def update_loginfo(rptlog, logfile, oldp
|
||
|
except IOError, msg:
|
||
|
common_err("couldn't the update %s.info: %s" % (rptlog, msg))
|
||
|
|
||
|
+def get_pe_num(pe_file):
|
||
|
+ try:
|
||
|
+ return re.search("pe-[^-]+-([0-9]+)[.]", pe_file).group(1)
|
||
|
+ except:
|
||
|
+ return "-1"
|
||
|
+
|
||
|
# r.group(1) transition number (a different thing from file number)
|
||
|
# r.group(2) contains full path
|
||
|
# r.group(3) file number
|
||
|
@@ -358,6 +362,40 @@ transition_patt = (
|
||
|
"crmd: .* unpack_graph: Unpacked transition (%%): ([0-9]+) actions", # number of actions
|
||
|
)
|
||
|
|
||
|
+def run_graph_msg_actions(msg):
|
||
|
+ '''
|
||
|
+ crmd: [13667]: info: run_graph: Transition 399 (Complete=5,
|
||
|
+ Pending=1, Fired=1, Skipped=0, Incomplete=3,
|
||
|
+ Source=...
|
||
|
+ '''
|
||
|
+ d = {}
|
||
|
+ s = msg
|
||
|
+ while True:
|
||
|
+ r = re.search("([A-Z][a-z]+)=([0-9]+)", s)
|
||
|
+ if not r:
|
||
|
+ return d
|
||
|
+ d[r.group(1)] = int(r.group(2))
|
||
|
+ s = s[r.end():]
|
||
|
+def transition_actions(msg_l, te_invoke_msg, pe_file):
|
||
|
+ '''
|
||
|
+ Get the number of actions for the transition.
|
||
|
+ '''
|
||
|
+ # check if there were any actions in this transition
|
||
|
+ pe_num = get_pe_num(pe_file)
|
||
|
+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)
|
||
|
+ run_patt = transition_patt[1].replace("%%", pe_num)
|
||
|
+ r = re.search(te_invoke_patt, te_invoke_msg)
|
||
|
+ trans_num = r.group(1)
|
||
|
+ unpack_patt = transition_patt[2].replace("%%", trans_num)
|
||
|
+ for msg in msg_l:
|
||
|
+ try:
|
||
|
+ return int(re.search(unpack_patt, msg).group(2))
|
||
|
+ except:
|
||
|
+ if re.search(run_patt, msg):
|
||
|
+ act_d = run_graph_msg_actions(msg)
|
||
|
+ return sum(act_d.values())
|
||
|
+ return -1
|
||
|
+
|
||
|
class Report(Singleton):
|
||
|
'''
|
||
|
A hb_report class.
|
||
|
@@ -396,8 +434,7 @@ class Report(Singleton):
|
||
|
def node_list(self):
|
||
|
return self.cibnode_l
|
||
|
def peinputs_list(self):
|
||
|
- return [re.search("pe-[^-]+-([0-9]+)[.]bz2$", x).group(1)
|
||
|
- for x in self.peinputs_l]
|
||
|
+ return [get_pe_num(x) for x in self.peinputs_l]
|
||
|
def unpack_report(self, tarball):
|
||
|
'''
|
||
|
Unpack hb_report tarball.
|
||
|
@@ -712,8 +749,6 @@ class Report(Singleton):
|
||
|
'''
|
||
|
Get a list of transitions.
|
||
|
Empty transitions are skipped.
|
||
|
- We use the unpack_graph message to see the number of
|
||
|
- actions.
|
||
|
Some callers need original PE file path (keep_pe_path),
|
||
|
otherwise we produce the path within the report.
|
||
|
If the caller doesn't provide the message list, then we
|
||
|
@@ -738,19 +773,12 @@ class Report(Singleton):
|
||
|
node = msg_a[3]
|
||
|
pe_file = msg_a[-1]
|
||
|
pe_base = os.path.basename(pe_file)
|
||
|
- # check if there were any actions in this transition
|
||
|
- r = re.search(trans_re_l[0], msg)
|
||
|
- trans_num = r.group(1)
|
||
|
- unpack_patt = transition_patt[2].replace("%%", trans_num)
|
||
|
- num_actions = 0
|
||
|
- for t in msg_l:
|
||
|
- try:
|
||
|
- num_actions = int(re.search(unpack_patt, t).group(2))
|
||
|
- break
|
||
|
- except: pass
|
||
|
+ num_actions = transition_actions(msg_l, msg, pe_file)
|
||
|
if num_actions == 0: # empty transition
|
||
|
- common_debug("skipping empty transition %s (%s)" % (trans_num, pe_base))
|
||
|
+ common_debug("skipping empty transition (%s)" % pe_base)
|
||
|
continue
|
||
|
+ elif num_actions == -1: # couldn't find messages
|
||
|
+ common_warn("could not find number of actions for transition (%s)" % pe_base)
|
||
|
common_debug("found PE input at %s: %s" % (node, pe_file))
|
||
|
if keep_pe_path:
|
||
|
pe_l.append(pe_file)
|
||
|
@@ -894,6 +922,34 @@ class Report(Singleton):
|
||
|
self.error("no resources or nodes found")
|
||
|
return False
|
||
|
self.show_logs(re_l = all_re_l)
|
||
|
+ def get_transition_msgs(self, pe_file, msg_l = []):
|
||
|
+ if not msg_l:
|
||
|
+ trans_re_l = [x.replace("%%", "[0-9]+") for x in transition_patt]
|
||
|
+ msg_l = self.logobj.get_matches(trans_re_l)
|
||
|
+ te_invoke_msg = ""
|
||
|
+ run_msg = ""
|
||
|
+ unpack_msg = ""
|
||
|
+ pe_num = get_pe_num(pe_file)
|
||
|
+ te_invoke_patt = transition_patt[0].replace("%%", pe_num)
|
||
|
+ run_patt = transition_patt[1].replace("%%", pe_num)
|
||
|
+ r = None
|
||
|
+ for msg in msg_l:
|
||
|
+ r = re.search(te_invoke_patt, msg)
|
||
|
+ if r:
|
||
|
+ te_invoke_msg = msg
|
||
|
+ break
|
||
|
+ if not r:
|
||
|
+ return ["", "", ""]
|
||
|
+ trans_num = r.group(1)
|
||
|
+ unpack_patt = transition_patt[2].replace("%%", trans_num)
|
||
|
+ for msg in msg_l:
|
||
|
+ if re.search(run_patt, msg):
|
||
|
+ run_msg = msg
|
||
|
+ elif re.search(unpack_patt, msg):
|
||
|
+ unpack_msg = msg
|
||
|
+ if run_msg and unpack_msg:
|
||
|
+ break
|
||
|
+ return [unpack_msg, te_invoke_msg, run_msg]
|
||
|
def show_transition_log(self, pe_file):
|
||
|
'''
|
||
|
Search for events within the given transition.
|
||
|
@@ -901,28 +957,34 @@ class Report(Singleton):
|
||
|
if not self.prepare_source():
|
||
|
return False
|
||
|
pe_base = os.path.basename(pe_file)
|
||
|
- r = re.search("pe-[^-]+-([0-9]+)[.]", pe_base)
|
||
|
- pe_num = r.group(1)
|
||
|
- trans_re_l = [x.replace("%%",pe_num) for x in transition_patt]
|
||
|
- trans_start = self.logobj.search_logs(self.log_l, trans_re_l[0])
|
||
|
- trans_end = self.logobj.search_logs(self.log_l, trans_re_l[1])
|
||
|
- if not trans_start:
|
||
|
+ pe_num = get_pe_num(pe_base)
|
||
|
+ unpack_msg, te_invoke_msg, run_msg = self.get_transition_msgs(pe_file)
|
||
|
+ if not te_invoke_msg:
|
||
|
common_warn("start of transition %s not found in logs" % pe_base)
|
||
|
return False
|
||
|
- if not trans_end:
|
||
|
+ if not run_msg:
|
||
|
common_warn("end of transition %s not found in logs (transition not complete yet?)" % pe_base)
|
||
|
return False
|
||
|
- common_debug("transition start: %s" % trans_start[0])
|
||
|
- common_debug("transition end: %s" % trans_end[0])
|
||
|
- start_ts = syslog_ts(trans_start[0])
|
||
|
- end_ts = syslog_ts(trans_end[0])
|
||
|
+ common_debug("transition start: %s" % te_invoke_msg)
|
||
|
+ common_debug("transition end: %s" % run_msg)
|
||
|
+ start_ts = syslog_ts(te_invoke_msg)
|
||
|
+ end_ts = syslog_ts(run_msg)
|
||
|
if not start_ts or not end_ts:
|
||
|
self.warn("strange, no timestamps found")
|
||
|
return False
|
||
|
- # limit the log scope temporarily
|
||
|
+ act_d = run_graph_msg_actions(run_msg)
|
||
|
+ total = sum(act_d.values())
|
||
|
+ s = ""
|
||
|
+ for a in act_d:
|
||
|
+ if not act_d[a]:
|
||
|
+ continue
|
||
|
+ s = "%s %s=%d" % (s, a, act_d[a])
|
||
|
+ common_info("transition %s %d actions: %s" %
|
||
|
+ (pe_file.replace(self.loc+"/",""), total, s))
|
||
|
common_info("logs for transition %s (%s-%s)" %
|
||
|
(pe_file.replace(self.loc+"/",""), \
|
||
|
shorttime(start_ts), shorttime(end_ts)))
|
||
|
+ # limit the log scope temporarily
|
||
|
self.logobj.set_log_timeframe(start_ts, end_ts)
|
||
|
self.events()
|
||
|
self.logobj.set_log_timeframe(self.from_dt, self.to_dt)
|