#!/usr/bin/env python
#
# Debugify summary for the original debug info testing.
#
from __future__ import print_function
import argparse
import os
import re
import sys
from json import loads
from collections import defaultdict
from collections import OrderedDict
class DILocBug:
def __init__(self, origin, action, bb_name, fn_name, instr):
self.origin = origin
self.action = action
self.bb_name = bb_name
self.fn_name = fn_name
self.instr = instr
def key(self):
return self.action + self.bb_name + self.fn_name + self.instr
def reduced_key(self, bug_pass):
if self.origin is not None:
# If we have the origin stacktrace available, we can use it to efficiently deduplicate identical errors. We
# just need to remove the pointer values from the string first, so that we can deduplicate across files.
origin_no_addr = re.sub(r"0x[0-9a-fA-F]+", "", self.origin)
return origin_no_addr
return bug_pass + self.instr
def to_dict(self):
result = {
"instr": self.instr,
"fn_name": self.fn_name,
"bb_name": self.bb_name,
"action": self.action,
}
if self.origin:
result["origin"] = self.origin
return result
class DISPBug:
def __init__(self, action, fn_name):
self.action = action
self.fn_name = fn_name
def key(self):
return self.action + self.fn_name
def reduced_key(self, bug_pass):
return bug_pass + self.fn_name
def to_dict(self):
return {
"fn_name": self.fn_name,
"action": self.action,
}
class DIVarBug:
def __init__(self, action, name, fn_name):
self.action = action
self.name = name
self.fn_name = fn_name
def key(self):
return self.action + self.name + self.fn_name
def reduced_key(self, bug_pass):
return bug_pass + self.name
def to_dict(self):
return {
"fn_name": self.fn_name,
"name": self.name,
"action": self.action,
}
def print_bugs_yaml(name, bugs_dict, indent=2):
def get_bug_line(indent_level: int, text: str, margin_mark: bool = False):
if margin_mark:
return "- ".rjust(indent_level * indent) + text
return " " * indent * indent_level + text
print(f"{name}:")
for bugs_file, bugs_pass_dict in sorted(iter(bugs_dict.items())):
print(get_bug_line(1, f"{bugs_file}:"))
for bugs_pass, bugs_list in sorted(iter(bugs_pass_dict.items())):
print(get_bug_line(2, f"{bugs_pass}:"))
for bug in bugs_list:
bug_dict = bug.to_dict()
first_line = True
# First item needs a '-' in the margin.
for key, val in sorted(iter(bug_dict.items())):
if "\n" in val:
# Output block text for any multiline string.
print(get_bug_line(3, f"{key}: |", first_line))
for line in val.splitlines():
print(get_bug_line(4, line))
else:
print(get_bug_line(3, f"{key}: {val}", first_line))
first_line = False
# Report the bugs in form of html.
def generate_html_report(
di_location_bugs,
di_subprogram_bugs,
di_var_bugs,
di_location_bugs_summary,
di_sp_bugs_summary,
di_var_bugs_summary,
html_file,
):
fileout = open(html_file, "w")
html_header = """
"""
# Create the table for Location bugs.
table_title_di_loc = "Location Bugs found by the Debugify"
table_di_loc = """
{}
""".format(
table_title_di_loc
)
# If any DILocation bug has an origin stack trace, we emit an extra column in the table, which we must therefore
# determine up-front.
has_origin_col = any(
x.origin is not None
for per_file_bugs in di_location_bugs.values()
for per_pass_bugs in per_file_bugs.values()
for x in per_pass_bugs
)
header_di_loc = [
"File",
"LLVM Pass Name",
"LLVM IR Instruction",
"Function Name",
"Basic Block Name",
"Action",
]
if has_origin_col:
header_di_loc.append("Origin")
for column in header_di_loc:
table_di_loc += " {0} | \n".format(column.strip())
table_di_loc += "
\n"
at_least_one_bug_found = False
# Handle loction bugs.
for file, per_file_bugs in di_location_bugs.items():
for llvm_pass, per_pass_bugs in per_file_bugs.items():
# No location bugs for the pass.
if len(per_pass_bugs) == 0:
continue
at_least_one_bug_found = True
row = []
table_di_loc += " \n"
# Get the bugs info.
for x in per_pass_bugs:
row.append(" \n")
row.append(file)
row.append(llvm_pass)
row.append(x.instr)
row.append(x.fn_name)
row.append(x.bb_name)
row.append(x.action)
if has_origin_col:
if x.origin is not None:
row.append(
f"View Origin StackTrace
{x.origin}
"
)
else:
row.append("")
row.append("
\n")
# Dump the bugs info into the table.
for column in row:
# The same file-pass pair can have multiple bugs.
if column == " \n" or column == "
\n":
table_di_loc += column
continue
table_di_loc += " {0} | \n".format(column.strip())
table_di_loc += " \n"
if not at_least_one_bug_found:
table_di_loc += """
No bugs found |
"""
table_di_loc += "
\n"
# Create the summary table for the loc bugs.
table_title_di_loc_sum = "Summary of Location Bugs"
table_di_loc_sum = """
{}
""".format(
table_title_di_loc_sum
)
header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"]
for column in header_di_loc_sum:
table_di_loc_sum += " {0} | \n".format(column.strip())
table_di_loc_sum += "
\n"
# Print the summary.
row = []
for llvm_pass, num in sorted(di_location_bugs_summary.items()):
row.append(" \n")
row.append(llvm_pass)
row.append(str(num))
row.append("
\n")
for column in row:
if column == " \n" or column == "
\n":
table_di_loc_sum += column
continue
table_di_loc_sum += " {0} | \n".format(column.strip())
table_di_loc_sum += " \n"
if not at_least_one_bug_found:
table_di_loc_sum += """
No bugs found |
"""
table_di_loc_sum += "
\n"
# Create the table for SP bugs.
table_title_di_sp = "SP Bugs found by the Debugify"
table_di_sp = """
{}
""".format(
table_title_di_sp
)
header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"]
for column in header_di_sp:
table_di_sp += " {0} | \n".format(column.strip())
table_di_sp += "
\n"
at_least_one_bug_found = False
# Handle fn bugs.
for file, per_file_bugs in di_subprogram_bugs.items():
for llvm_pass, per_pass_bugs in per_file_bugs.items():
# No SP bugs for the pass.
if len(per_pass_bugs) == 0:
continue
at_least_one_bug_found = True
row = []
table_di_sp += " \n"
# Get the bugs info.
for x in per_pass_bugs:
row.append(" \n")
row.append(file)
row.append(llvm_pass)
row.append(x.fn_name)
row.append(x.action)
row.append("
\n")
# Dump the bugs info into the table.
for column in row:
# The same file-pass pair can have multiple bugs.
if column == " \n" or column == "
\n":
table_di_sp += column
continue
table_di_sp += " {0} | \n".format(column.strip())
table_di_sp += " \n"
if not at_least_one_bug_found:
table_di_sp += """
No bugs found |
"""
table_di_sp += "
\n"
# Create the summary table for the sp bugs.
table_title_di_sp_sum = "Summary of SP Bugs"
table_di_sp_sum = """
{}
""".format(
table_title_di_sp_sum
)
header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"]
for column in header_di_sp_sum:
table_di_sp_sum += " {0} | \n".format(column.strip())
table_di_sp_sum += "
\n"
# Print the summary.
row = []
for llvm_pass, num in sorted(di_sp_bugs_summary.items()):
row.append(" \n")
row.append(llvm_pass)
row.append(str(num))
row.append("
\n")
for column in row:
if column == " \n" or column == "
\n":
table_di_sp_sum += column
continue
table_di_sp_sum += " {0} | \n".format(column.strip())
table_di_sp_sum += " \n"
if not at_least_one_bug_found:
table_di_sp_sum += """
No bugs found |
"""
table_di_sp_sum += "
\n"
# Create the table for Variable bugs.
table_title_di_var = "Variable Location Bugs found by the Debugify"
table_di_var = """
{}
""".format(
table_title_di_var
)
header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"]
for column in header_di_var:
table_di_var += " {0} | \n".format(column.strip())
table_di_var += "
\n"
at_least_one_bug_found = False
# Handle var bugs.
for file, per_file_bugs in di_var_bugs.items():
for llvm_pass, per_pass_bugs in per_file_bugs.items():
# No SP bugs for the pass.
if len(per_pass_bugs) == 0:
continue
at_least_one_bug_found = True
row = []
table_di_var += " \n"
# Get the bugs info.
for x in per_pass_bugs:
row.append(" \n")
row.append(file)
row.append(llvm_pass)
row.append(x.name)
row.append(x.fn_name)
row.append(x.action)
row.append("
\n")
# Dump the bugs info into the table.
for column in row:
# The same file-pass pair can have multiple bugs.
if column == " \n" or column == "
\n":
table_di_var += column
continue
table_di_var += " {0} | \n".format(column.strip())
table_di_var += " \n"
if not at_least_one_bug_found:
table_di_var += """
No bugs found |
"""
table_di_var += "
\n"
# Create the summary table for the sp bugs.
table_title_di_var_sum = "Summary of Variable Location Bugs"
table_di_var_sum = """
{}
""".format(
table_title_di_var_sum
)
header_di_var_sum = ["LLVM Pass Name", "Number of bugs"]
for column in header_di_var_sum:
table_di_var_sum += " {0} | \n".format(column.strip())
table_di_var_sum += "
\n"
# Print the summary.
row = []
for llvm_pass, num in sorted(di_var_bugs_summary.items()):
row.append(" \n")
row.append(llvm_pass)
row.append(str(num))
row.append("
\n")
for column in row:
if column == " \n" or column == "
\n":
table_di_var_sum += column
continue
table_di_var_sum += " {0} | \n".format(column.strip())
table_di_var_sum += " \n"
if not at_least_one_bug_found:
table_di_var_sum += """
No bugs found |
"""
table_di_var_sum += "
\n"
# Finish the html page.
html_footer = """
"""
new_line = "
\n"
fileout.writelines(html_header)
fileout.writelines(table_di_loc)
fileout.writelines(new_line)
fileout.writelines(table_di_loc_sum)
fileout.writelines(new_line)
fileout.writelines(new_line)
fileout.writelines(table_di_sp)
fileout.writelines(new_line)
fileout.writelines(table_di_sp_sum)
fileout.writelines(new_line)
fileout.writelines(new_line)
fileout.writelines(table_di_var)
fileout.writelines(new_line)
fileout.writelines(table_di_var_sum)
fileout.writelines(html_footer)
fileout.close()
print("The " + html_file + " generated.")
# Read the JSON file in chunks.
def get_json_chunk(file, start, size):
json_parsed = None
di_checker_data = []
skipped_lines = 0
line = 0
# The file contains json object per line.
# An example of the line (formatted json):
# {
# "file": "simple.c",
# "pass": "Deduce function attributes in RPO",
# "bugs": [
# [
# {
# "action": "drop",
# "metadata": "DISubprogram",
# "name": "fn2"
# },
# {
# "action": "drop",
# "metadata": "DISubprogram",
# "name": "fn1"
# }
# ]
# ]
# }
with open(file) as json_objects_file:
for json_object_line in json_objects_file:
line += 1
if line < start:
continue
if line >= start + size:
break
try:
json_object = loads(json_object_line)
except:
skipped_lines += 1
else:
di_checker_data.append(json_object)
return (di_checker_data, skipped_lines, line)
# Parse the program arguments.
def parse_program_args(parser):
parser.add_argument("file_name", type=str, help="json file to process")
parser.add_argument(
"--reduce",
action="store_true",
help="create reduced report by deduplicating bugs within and across files",
)
report_type_group = parser.add_mutually_exclusive_group(required=True)
report_type_group.add_argument(
"--report-html-file", type=str, help="output HTML file for the generated report"
)
report_type_group.add_argument(
"--acceptance-test",
action="store_true",
help="if set, produce terminal-friendly output and return 0 iff the input file is empty or does not exist",
)
return parser.parse_args()
def Main():
parser = argparse.ArgumentParser()
opts = parse_program_args(parser)
if opts.report_html_file is not None and not opts.report_html_file.endswith(
".html"
):
print("error: The output file must be '.html'.")
sys.exit(1)
if opts.acceptance_test:
if os.path.isdir(opts.file_name):
print(f"error: Directory passed as input file: '{opts.file_name}'")
sys.exit(1)
if not os.path.exists(opts.file_name):
# We treat an empty input file as a success, as debugify will generate an output file iff any errors are
# found, meaning we expect 0 errors to mean that the expected file does not exist.
print(f"No errors detected for: {opts.file_name}")
sys.exit(0)
# Use the defaultdict in order to make multidim dicts.
di_location_bugs = defaultdict(lambda: defaultdict(list))
di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
di_variable_bugs = defaultdict(lambda: defaultdict(list))
# Use the ordered dict to make a summary.
di_location_bugs_summary = OrderedDict()
di_sp_bugs_summary = OrderedDict()
di_var_bugs_summary = OrderedDict()
# If we are using --reduce, use these sets to deduplicate similar bugs within and across files.
di_loc_reduced_set = set()
di_sp_reduced_set = set()
di_var_reduced_set = set()
start_line = 0
chunk_size = 1000000
end_line = chunk_size - 1
skipped_lines = 0
skipped_bugs = 0
# Process each chunk of 1 million JSON lines.
while True:
if start_line > end_line:
break
(debug_info_bugs, skipped, end_line) = get_json_chunk(
opts.file_name, start_line, chunk_size
)
start_line += chunk_size
skipped_lines += skipped
# Map the bugs into the file-pass pairs.
for bugs_per_pass in debug_info_bugs:
try:
bugs_file = bugs_per_pass["file"]
bugs_pass = bugs_per_pass["pass"]
bugs = bugs_per_pass["bugs"][0]
except:
skipped_lines += 1
continue
di_loc_bugs = di_location_bugs.get("bugs_file", {}).get("bugs_pass", [])
di_sp_bugs = di_subprogram_bugs.get("bugs_file", {}).get("bugs_pass", [])
di_var_bugs = di_variable_bugs.get("bugs_file", {}).get("bugs_pass", [])
# Omit duplicated bugs.
di_loc_set = set()
di_sp_set = set()
di_var_set = set()
for bug in bugs:
try:
bugs_metadata = bug["metadata"]
except:
skipped_bugs += 1
continue
if bugs_metadata == "DILocation":
try:
origin = bug.get("origin")
action = bug["action"]
bb_name = bug["bb-name"]
fn_name = bug["fn-name"]
instr = bug["instr"]
except:
skipped_bugs += 1
continue
di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
if not di_loc_bug.key() in di_loc_set:
di_loc_set.add(di_loc_bug.key())
if opts.reduce:
reduced_key = di_loc_bug.reduced_key(bugs_pass)
if not reduced_key in di_loc_reduced_set:
di_loc_reduced_set.add(reduced_key)
di_loc_bugs.append(di_loc_bug)
else:
di_loc_bugs.append(di_loc_bug)
# Fill the summary dict.
if bugs_pass in di_location_bugs_summary:
di_location_bugs_summary[bugs_pass] += 1
else:
di_location_bugs_summary[bugs_pass] = 1
elif bugs_metadata == "DISubprogram":
try:
action = bug["action"]
name = bug["name"]
except:
skipped_bugs += 1
continue
di_sp_bug = DISPBug(action, name)
if not di_sp_bug.key() in di_sp_set:
di_sp_set.add(di_sp_bug.key())
if opts.reduce:
reduced_key = di_sp_bug.reduced_key(bugs_pass)
if not reduced_key in di_sp_reduced_set:
di_sp_reduced_set.add(reduced_key)
di_sp_bugs.append(di_sp_bug)
else:
di_sp_bugs.append(di_sp_bug)
# Fill the summary dict.
if bugs_pass in di_sp_bugs_summary:
di_sp_bugs_summary[bugs_pass] += 1
else:
di_sp_bugs_summary[bugs_pass] = 1
elif bugs_metadata == "dbg-var-intrinsic":
try:
action = bug["action"]
fn_name = bug["fn-name"]
name = bug["name"]
except:
skipped_bugs += 1
continue
di_var_bug = DIVarBug(action, name, fn_name)
if not di_var_bug.key() in di_var_set:
di_var_set.add(di_var_bug.key())
if opts.reduce:
reduced_key = di_var_bug.reduced_key(bugs_pass)
if not reduced_key in di_var_reduced_set:
di_var_reduced_set.add(reduced_key)
di_var_bugs.append(di_var_bug)
else:
di_var_bugs.append(di_var_bug)
# Fill the summary dict.
if bugs_pass in di_var_bugs_summary:
di_var_bugs_summary[bugs_pass] += 1
else:
di_var_bugs_summary[bugs_pass] = 1
else:
# Unsupported metadata.
skipped_bugs += 1
continue
if di_loc_bugs:
di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
if di_sp_bugs:
di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
if di_var_bugs:
di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
if opts.report_html_file is not None:
generate_html_report(
di_location_bugs,
di_subprogram_bugs,
di_variable_bugs,
di_location_bugs_summary,
di_sp_bugs_summary,
di_var_bugs_summary,
opts.report_html_file,
)
else:
# Pretty(ish) print the detected bugs, but check if any exist first so that we don't print an empty dict.
if di_location_bugs:
print_bugs_yaml("DILocation Bugs", di_location_bugs)
if di_subprogram_bugs:
print_bugs_yaml("DISubprogram Bugs", di_subprogram_bugs)
if di_variable_bugs:
print_bugs_yaml("DIVariable Bugs", di_variable_bugs)
if opts.acceptance_test:
if any((di_location_bugs, di_subprogram_bugs, di_variable_bugs)):
# Add a newline gap after printing at least one error.
print()
print(f"Errors detected for: {opts.file_name}")
sys.exit(1)
else:
print(f"No errors detected for: {opts.file_name}")
if skipped_lines > 0:
print("Skipped lines: " + str(skipped_lines))
if skipped_bugs > 0:
print("Skipped bugs: " + str(skipped_bugs))
if __name__ == "__main__":
Main()
sys.exit(0)