Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
		
			
				
	
	
		
			324 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			324 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| """
 | |
| This takes a crashing qtest trace and tries to remove superfluous operations
 | |
| """
 | |
| 
 | |
| import sys
 | |
| import os
 | |
| import subprocess
 | |
| import time
 | |
| import struct
 | |
| 
 | |
| QEMU_ARGS = None
 | |
| QEMU_PATH = None
 | |
| TIMEOUT = 5
 | |
| CRASH_TOKEN = None
 | |
| 
 | |
| # Minimization levels
 | |
| M1 = False # try removing IO commands iteratively
 | |
| M2 = False # try setting bits in operand of write/out to zero
 | |
| 
 | |
| write_suffix_lookup = {"b": (1, "B"),
 | |
|                        "w": (2, "H"),
 | |
|                        "l": (4, "L"),
 | |
|                        "q": (8, "Q")}
 | |
| 
 | |
| def usage():
 | |
|     sys.exit("""\
 | |
| Usage:
 | |
| 
 | |
| QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace
 | |
| 
 | |
| By default, will try to use the second-to-last line in the output to identify
 | |
| whether the crash occred. Optionally, manually set a string that idenitifes the
 | |
| crash by setting CRASH_TOKEN=
 | |
| 
 | |
| Options:
 | |
| 
 | |
| -M1: enable a loop around the remove minimizer, which may help decrease some
 | |
|      timing dependent instructions. Off by default.
 | |
| -M2: try setting bits in operand of write/out to zero. Off by default.
 | |
| 
 | |
| """.format((sys.argv[0])))
 | |
| 
 | |
| deduplication_note = """\n\
 | |
| Note: While trimming the input, sometimes the mutated trace triggers a different
 | |
| type crash but indicates the same bug. Under this situation, our minimizer is
 | |
| incapable of recognizing and stopped from removing it. In the future, we may
 | |
| use a more sophisticated crash case deduplication method.
 | |
| \n"""
 | |
| 
 | |
| def check_if_trace_crashes(trace, path):
 | |
|     with open(path, "w") as tracefile:
 | |
|         tracefile.write("".join(trace))
 | |
| 
 | |
|     rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
 | |
|     < {trace_path}".format(timeout=TIMEOUT,
 | |
|                            qemu_path=QEMU_PATH,
 | |
|                            qemu_args=QEMU_ARGS,
 | |
|                            trace_path=path),
 | |
|                           shell=True,
 | |
|                           stdin=subprocess.PIPE,
 | |
|                           stdout=subprocess.PIPE,
 | |
|                           encoding="utf-8")
 | |
|     global CRASH_TOKEN
 | |
|     if CRASH_TOKEN is None:
 | |
|         try:
 | |
|             outs, _ = rc.communicate(timeout=5)
 | |
|             CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
 | |
|         except subprocess.TimeoutExpired:
 | |
|             print("subprocess.TimeoutExpired")
 | |
|             return False
 | |
|         print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
 | |
|         global deduplication_note
 | |
|         print(deduplication_note)
 | |
|         return True
 | |
| 
 | |
|     for line in iter(rc.stdout.readline, ""):
 | |
|         if "CLOSED" in line:
 | |
|             return False
 | |
|         if CRASH_TOKEN in line:
 | |
|             return True
 | |
| 
 | |
|     print("\nWarning:")
 | |
|     print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
 | |
|     print("  Usually this indicates a different type of crash.\n")
 | |
|     return False
 | |
| 
 | |
| 
 | |
| # If previous write commands write the same length of data at the same
 | |
| # interval, we view it as a hint.
 | |
| def split_write_hint(newtrace, i):
 | |
|     HINT_LEN = 3 # > 2
 | |
|     if i <=(HINT_LEN-1):
 | |
|         return None
 | |
| 
 | |
|     #find previous continuous write traces
 | |
|     k = 0
 | |
|     l = i-1
 | |
|     writes = []
 | |
|     while (k != HINT_LEN and l >= 0):
 | |
|         if newtrace[l].startswith("write "):
 | |
|             writes.append(newtrace[l])
 | |
|             k += 1
 | |
|             l -= 1
 | |
|         elif newtrace[l] == "":
 | |
|             l -= 1
 | |
|         else:
 | |
|             return None
 | |
|     if k != HINT_LEN:
 | |
|         return None
 | |
| 
 | |
|     length = int(writes[0].split()[2], 16)
 | |
|     for j in range(1, HINT_LEN):
 | |
|         if length != int(writes[j].split()[2], 16):
 | |
|             return None
 | |
| 
 | |
|     step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16)
 | |
|     for j in range(1, HINT_LEN-1):
 | |
|         if step != int(writes[j].split()[1], 16) - \
 | |
|             int(writes[j+1].split()[1], 16):
 | |
|             return None
 | |
| 
 | |
|     return (int(writes[0].split()[1], 16)+step, length)
 | |
| 
 | |
| 
 | |
| def remove_lines(newtrace, outpath):
 | |
|     remove_step = 1
 | |
|     i = 0
 | |
|     while i < len(newtrace):
 | |
|         # 1.) Try to remove lines completely and reproduce the crash.
 | |
|         # If it works, we're done.
 | |
|         if (i+remove_step) >= len(newtrace):
 | |
|             remove_step = 1
 | |
|         prior = newtrace[i:i+remove_step]
 | |
|         for j in range(i, i+remove_step):
 | |
|             newtrace[j] = ""
 | |
|         print("Removing {lines} ...\n".format(lines=prior))
 | |
|         if check_if_trace_crashes(newtrace, outpath):
 | |
|             i += remove_step
 | |
|             # Double the number of lines to remove for next round
 | |
|             remove_step *= 2
 | |
|             continue
 | |
|         # Failed to remove multiple IOs, fast recovery
 | |
|         if remove_step > 1:
 | |
|             for j in range(i, i+remove_step):
 | |
|                 newtrace[j] = prior[j-i]
 | |
|             remove_step = 1
 | |
|             continue
 | |
|         newtrace[i] = prior[0] # remove_step = 1
 | |
| 
 | |
|         # 2.) Try to replace write{bwlq} commands with a write addr, len
 | |
|         # command. Since this can require swapping endianness, try both LE and
 | |
|         # BE options. We do this, so we can "trim" the writes in (3)
 | |
| 
 | |
|         if (newtrace[i].startswith("write") and not
 | |
|             newtrace[i].startswith("write ")):
 | |
|             suffix = newtrace[i].split()[0][-1]
 | |
|             assert(suffix in write_suffix_lookup)
 | |
|             addr = int(newtrace[i].split()[1], 16)
 | |
|             value = int(newtrace[i].split()[2], 16)
 | |
|             for endianness in ['<', '>']:
 | |
|                 data = struct.pack("{end}{size}".format(end=endianness,
 | |
|                                    size=write_suffix_lookup[suffix][1]),
 | |
|                                    value)
 | |
|                 newtrace[i] = "write {addr} {size} 0x{data}\n".format(
 | |
|                     addr=hex(addr),
 | |
|                     size=hex(write_suffix_lookup[suffix][0]),
 | |
|                     data=data.hex())
 | |
|                 if(check_if_trace_crashes(newtrace, outpath)):
 | |
|                     break
 | |
|             else:
 | |
|                 newtrace[i] = prior[0]
 | |
| 
 | |
|         # 3.) If it is a qtest write command: write addr len data, try to split
 | |
|         # it into two separate write commands. If splitting the data operand
 | |
|         # from length/2^n bytes to the left does not work, try to move the pivot
 | |
|         # to the right side, then add one to n, until length/2^n == 0. The idea
 | |
|         # is to prune unnecessary bytes from long writes, while accommodating
 | |
|         # arbitrary MemoryRegion access sizes and alignments.
 | |
| 
 | |
|         # This algorithm will fail under some rare situations.
 | |
|         # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)
 | |
| 
 | |
|         if newtrace[i].startswith("write "):
 | |
|             addr = int(newtrace[i].split()[1], 16)
 | |
|             length = int(newtrace[i].split()[2], 16)
 | |
|             data = newtrace[i].split()[3][2:]
 | |
|             if length > 1:
 | |
| 
 | |
|                 # Can we get a hint from previous writes?
 | |
|                 hint = split_write_hint(newtrace, i)
 | |
|                 if hint is not None:
 | |
|                     hint_addr = hint[0]
 | |
|                     hint_len = hint[1]
 | |
|                     if hint_addr >= addr and hint_addr+hint_len <= addr+length:
 | |
|                         newtrace[i] = "write {addr} {size} 0x{data}\n".format(
 | |
|                             addr=hex(hint_addr),
 | |
|                             size=hex(hint_len),
 | |
|                             data=data[(hint_addr-addr)*2:\
 | |
|                                 (hint_addr-addr)*2+hint_len*2])
 | |
|                         if check_if_trace_crashes(newtrace, outpath):
 | |
|                             # next round
 | |
|                             i += 1
 | |
|                             continue
 | |
|                         newtrace[i] = prior[0]
 | |
| 
 | |
|                 # Try splitting it using a binary approach
 | |
|                 leftlength = int(length/2)
 | |
|                 rightlength = length - leftlength
 | |
|                 newtrace.insert(i+1, "")
 | |
|                 power = 1
 | |
|                 while leftlength > 0:
 | |
|                     newtrace[i] = "write {addr} {size} 0x{data}\n".format(
 | |
|                             addr=hex(addr),
 | |
|                             size=hex(leftlength),
 | |
|                             data=data[:leftlength*2])
 | |
|                     newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
 | |
|                             addr=hex(addr+leftlength),
 | |
|                             size=hex(rightlength),
 | |
|                             data=data[leftlength*2:])
 | |
|                     if check_if_trace_crashes(newtrace, outpath):
 | |
|                         break
 | |
|                     # move the pivot to right side
 | |
|                     if leftlength < rightlength:
 | |
|                         rightlength, leftlength = leftlength, rightlength
 | |
|                         continue
 | |
|                     power += 1
 | |
|                     leftlength = int(length/pow(2, power))
 | |
|                     rightlength = length - leftlength
 | |
|                 if check_if_trace_crashes(newtrace, outpath):
 | |
|                     i -= 1
 | |
|                 else:
 | |
|                     newtrace[i] = prior[0]
 | |
|                     del newtrace[i+1]
 | |
|         i += 1
 | |
| 
 | |
| 
 | |
| def clear_bits(newtrace, outpath):
 | |
|     # try setting bits in operands of out/write to zero
 | |
|     i = 0
 | |
|     while i < len(newtrace):
 | |
|         if (not newtrace[i].startswith("write ") and not
 | |
|            newtrace[i].startswith("out")):
 | |
|            i += 1
 | |
|            continue
 | |
|         # write ADDR SIZE DATA
 | |
|         # outx ADDR VALUE
 | |
|         print("\nzero setting bits: {}".format(newtrace[i]))
 | |
| 
 | |
|         prefix = " ".join(newtrace[i].split()[:-1])
 | |
|         data = newtrace[i].split()[-1]
 | |
|         data_bin = bin(int(data, 16))
 | |
|         data_bin_list = list(data_bin)
 | |
| 
 | |
|         for j in range(2, len(data_bin_list)):
 | |
|             prior = newtrace[i]
 | |
|             if (data_bin_list[j] == '1'):
 | |
|                 data_bin_list[j] = '0'
 | |
|                 data_try = hex(int("".join(data_bin_list), 2))
 | |
|                 # It seems qtest only accepts padded hex-values.
 | |
|                 if len(data_try) % 2 == 1:
 | |
|                     data_try = data_try[:2] + "0" + data_try[2:]
 | |
| 
 | |
|                 newtrace[i] = "{prefix} {data_try}\n".format(
 | |
|                         prefix=prefix,
 | |
|                         data_try=data_try)
 | |
| 
 | |
|                 if not check_if_trace_crashes(newtrace, outpath):
 | |
|                     data_bin_list[j] = '1'
 | |
|                     newtrace[i] = prior
 | |
|         i += 1
 | |
| 
 | |
| 
 | |
| def minimize_trace(inpath, outpath):
 | |
|     global TIMEOUT
 | |
|     with open(inpath) as f:
 | |
|         trace = f.readlines()
 | |
|     start = time.time()
 | |
|     if not check_if_trace_crashes(trace, outpath):
 | |
|         sys.exit("The input qtest trace didn't cause a crash...")
 | |
|     end = time.time()
 | |
|     print("Crashed in {} seconds".format(end-start))
 | |
|     TIMEOUT = (end-start)*5
 | |
|     print("Setting the timeout for {} seconds".format(TIMEOUT))
 | |
| 
 | |
|     newtrace = trace[:]
 | |
|     global M1, M2
 | |
| 
 | |
|     # remove lines
 | |
|     old_len = len(newtrace) + 1
 | |
|     while(old_len > len(newtrace)):
 | |
|         old_len = len(newtrace)
 | |
|         print("trace length = ", old_len)
 | |
|         remove_lines(newtrace, outpath)
 | |
|         if not M1 and not M2:
 | |
|             break
 | |
|         newtrace = list(filter(lambda s: s != "", newtrace))
 | |
|     assert(check_if_trace_crashes(newtrace, outpath))
 | |
| 
 | |
|     # set bits to zero
 | |
|     if M2:
 | |
|         clear_bits(newtrace, outpath)
 | |
|     assert(check_if_trace_crashes(newtrace, outpath))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     if len(sys.argv) < 3:
 | |
|         usage()
 | |
|     if "-M1" in sys.argv:
 | |
|         M1 = True
 | |
|     if "-M2" in sys.argv:
 | |
|         M2 = True
 | |
|     QEMU_PATH = os.getenv("QEMU_PATH")
 | |
|     QEMU_ARGS = os.getenv("QEMU_ARGS")
 | |
|     if QEMU_PATH is None or QEMU_ARGS is None:
 | |
|         usage()
 | |
|     # if "accel" not in QEMU_ARGS:
 | |
|     #     QEMU_ARGS += " -accel qtest"
 | |
|     CRASH_TOKEN = os.getenv("CRASH_TOKEN")
 | |
|     QEMU_ARGS += " -qtest stdio -monitor none -serial none "
 | |
|     minimize_trace(sys.argv[-2], sys.argv[-1])
 |