
The commands to run the compilation when printed with `-###` contain various irrelevant lines for the perf-training. Most of them are filtered out already but when configured with `CLANG_CONFIG_FILE_SYSTEM_DIR` a new line like the following is added and needs to be filtered out: `Configuration file: /etc/clang/x86_64-redhat-linux-gnu-clang.cfg`
719 lines
22 KiB
Python
719 lines
22 KiB
Python
# ===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
# ===------------------------------------------------------------------------===#
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
import sys
|
|
import os
|
|
import subprocess
|
|
import argparse
|
|
import time
|
|
import bisect
|
|
import shlex
|
|
import tempfile
|
|
import re
|
|
import shutil
|
|
|
|
test_env = {"PATH": os.environ["PATH"]}
|
|
|
|
|
|
def findFilesWithExtension(path, extension):
|
|
filenames = []
|
|
for root, dirs, files in os.walk(path):
|
|
for filename in files:
|
|
if filename.endswith(f".{extension}"):
|
|
filenames.append(os.path.join(root, filename))
|
|
return filenames
|
|
|
|
|
|
def clean(args):
|
|
if len(args) < 2:
|
|
print(
|
|
"Usage: %s clean <paths> <extension>\n" % __file__
|
|
+ "\tRemoves all files with extension from <path>."
|
|
)
|
|
return 1
|
|
for path in args[0:-1]:
|
|
for filename in findFilesWithExtension(path, args[-1]):
|
|
os.remove(filename)
|
|
return 0
|
|
|
|
|
|
def merge(args):
|
|
if len(args) < 3:
|
|
print(
|
|
"Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
|
|
+ "\tMerges all profraw files from path into output."
|
|
)
|
|
return 1
|
|
cmd = [args[0], "merge", "-o", args[1]]
|
|
for path in args[2:]:
|
|
cmd.extend(findFilesWithExtension(path, "profraw"))
|
|
subprocess.check_call(cmd)
|
|
return 0
|
|
|
|
|
|
def merge_fdata(args):
|
|
if len(args) != 3:
|
|
print(
|
|
"Usage: %s merge-fdata <merge-fdata> <output> <path>\n" % __file__
|
|
+ "\tMerges all fdata files from path into output."
|
|
)
|
|
return 1
|
|
cmd = [args[0], "-o", args[1]]
|
|
cmd.extend(findFilesWithExtension(args[2], "fdata"))
|
|
subprocess.check_call(cmd)
|
|
return 0
|
|
|
|
|
|
def perf(args):
|
|
parser = argparse.ArgumentParser(
|
|
prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
|
|
)
|
|
parser.add_argument(
|
|
"--lbr", action="store_true", help="Use perf with branch stacks"
|
|
)
|
|
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
|
|
|
|
opts = parser.parse_args(args)
|
|
cmd = opts.cmd[1:]
|
|
|
|
perf_args = [
|
|
"perf",
|
|
"record",
|
|
"--event=cycles:u",
|
|
"--freq=max",
|
|
"--output=%d.perf.data" % os.getpid(),
|
|
]
|
|
if opts.lbr:
|
|
perf_args += ["--branch-filter=any,u"]
|
|
perf_args.extend(cmd)
|
|
|
|
start_time = time.time()
|
|
subprocess.check_call(perf_args)
|
|
|
|
elapsed = time.time() - start_time
|
|
print("... data collection took %.4fs" % elapsed)
|
|
return 0
|
|
|
|
|
|
def perf2bolt(args):
|
|
parser = argparse.ArgumentParser(
|
|
prog="perf-helper perf2bolt",
|
|
description="perf2bolt conversion wrapper for perf.data files",
|
|
)
|
|
parser.add_argument("bolt", help="Path to llvm-bolt")
|
|
parser.add_argument("path", help="Path containing perf.data files")
|
|
parser.add_argument("binary", help="Input binary")
|
|
parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode")
|
|
opts = parser.parse_args(args)
|
|
|
|
p2b_args = [
|
|
opts.bolt,
|
|
opts.binary,
|
|
"--aggregate-only",
|
|
"--profile-format=yaml",
|
|
]
|
|
if not opts.lbr:
|
|
p2b_args += ["-nl"]
|
|
p2b_args += ["-p"]
|
|
for filename in findFilesWithExtension(opts.path, "perf.data"):
|
|
subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
|
|
return 0
|
|
|
|
|
|
def dtrace(args):
|
|
parser = argparse.ArgumentParser(
|
|
prog="perf-helper dtrace",
|
|
description="dtrace wrapper for order file generation",
|
|
)
|
|
parser.add_argument(
|
|
"--buffer-size",
|
|
metavar="size",
|
|
type=int,
|
|
required=False,
|
|
default=1,
|
|
help="dtrace buffer size in MB (default 1)",
|
|
)
|
|
parser.add_argument(
|
|
"--use-oneshot",
|
|
required=False,
|
|
action="store_true",
|
|
help="Use dtrace's oneshot probes",
|
|
)
|
|
parser.add_argument(
|
|
"--use-ustack",
|
|
required=False,
|
|
action="store_true",
|
|
help="Use dtrace's ustack to print function names",
|
|
)
|
|
parser.add_argument(
|
|
"--cc1",
|
|
required=False,
|
|
action="store_true",
|
|
help="Execute cc1 directly (don't profile the driver)",
|
|
)
|
|
parser.add_argument("cmd", nargs="*", help="")
|
|
|
|
# Use python's arg parser to handle all leading option arguments, but pass
|
|
# everything else through to dtrace
|
|
first_cmd = next(arg for arg in args if not arg.startswith("--"))
|
|
last_arg_idx = args.index(first_cmd)
|
|
|
|
opts = parser.parse_args(args[:last_arg_idx])
|
|
cmd = args[last_arg_idx:]
|
|
|
|
if opts.cc1:
|
|
cmd = get_cc1_command_for_args(cmd, test_env)
|
|
|
|
if opts.use_oneshot:
|
|
target = "oneshot$target:::entry"
|
|
else:
|
|
target = "pid$target:::entry"
|
|
predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
|
|
log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
|
|
if opts.use_ustack:
|
|
action = "ustack(1);"
|
|
else:
|
|
action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
|
|
dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
|
|
|
|
dtrace_args = []
|
|
if not os.geteuid() == 0:
|
|
print(
|
|
"Script must be run as root, or you must add the following to your sudoers:"
|
|
+ "%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace"
|
|
)
|
|
dtrace_args.append("sudo")
|
|
|
|
dtrace_args.extend(
|
|
(
|
|
"dtrace",
|
|
"-xevaltime=exec",
|
|
"-xbufsize=%dm" % (opts.buffer_size),
|
|
"-q",
|
|
"-n",
|
|
dtrace_script,
|
|
"-c",
|
|
" ".join(cmd),
|
|
)
|
|
)
|
|
|
|
if sys.platform == "darwin":
|
|
dtrace_args.append("-xmangled")
|
|
|
|
start_time = time.time()
|
|
|
|
with open("%d.dtrace" % os.getpid(), "w") as f:
|
|
f.write("### Command: %s" % dtrace_args)
|
|
subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
|
|
|
|
elapsed = time.time() - start_time
|
|
print("... data collection took %.4fs" % elapsed)
|
|
|
|
return 0
|
|
|
|
|
|
def get_cc1_command_for_args(cmd, env):
|
|
# Find the cc1 command used by the compiler. To do this we execute the
|
|
# compiler with '-###' to figure out what it wants to do.
|
|
cmd = cmd + ["-###"]
|
|
cc_output = subprocess.check_output(
|
|
cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True
|
|
).strip()
|
|
cc_commands = []
|
|
for ln in cc_output.split("\n"):
|
|
# Filter out known garbage.
|
|
if (
|
|
ln == "Using built-in specs."
|
|
or ln.startswith("Configured with:")
|
|
or ln.startswith("Target:")
|
|
or ln.startswith("Thread model:")
|
|
or ln.startswith("InstalledDir:")
|
|
or ln.startswith("LLVM Profile Note")
|
|
or ln.startswith(" (in-process)")
|
|
or ln.startswith("Configuration file:")
|
|
or " version " in ln
|
|
):
|
|
continue
|
|
cc_commands.append(ln)
|
|
|
|
if len(cc_commands) != 1:
|
|
print("Fatal error: unable to determine cc1 command: %r" % cc_output)
|
|
exit(1)
|
|
|
|
cc1_cmd = shlex.split(cc_commands[0])
|
|
if not cc1_cmd:
|
|
print("Fatal error: unable to determine cc1 command: %r" % cc_output)
|
|
exit(1)
|
|
|
|
return cc1_cmd
|
|
|
|
|
|
def cc1(args):
|
|
parser = argparse.ArgumentParser(
|
|
prog="perf-helper cc1", description="cc1 wrapper for order file generation"
|
|
)
|
|
parser.add_argument("cmd", nargs="*", help="")
|
|
|
|
# Use python's arg parser to handle all leading option arguments, but pass
|
|
# everything else through to dtrace
|
|
first_cmd = next(arg for arg in args if not arg.startswith("--"))
|
|
last_arg_idx = args.index(first_cmd)
|
|
|
|
opts = parser.parse_args(args[:last_arg_idx])
|
|
cmd = args[last_arg_idx:]
|
|
|
|
# clear the profile file env, so that we don't generate profdata
|
|
# when capturing the cc1 command
|
|
cc1_env = test_env
|
|
cc1_env["LLVM_PROFILE_FILE"] = os.devnull
|
|
cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)
|
|
|
|
subprocess.check_call(cc1_cmd)
|
|
return 0
|
|
|
|
|
|
def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, missing_symbols, opts):
|
|
def fix_mangling(symbol):
|
|
if sys.platform == "darwin":
|
|
if symbol[0] != "_" and symbol != "start":
|
|
symbol = "_" + symbol
|
|
return symbol
|
|
|
|
def get_symbols_with_prefix(symbol):
|
|
start_index = bisect.bisect_left(all_symbols, symbol)
|
|
for s in all_symbols[start_index:]:
|
|
if not s.startswith(symbol):
|
|
break
|
|
yield s
|
|
|
|
# Extract the list of symbols from the given file, which is assumed to be
|
|
# the output of a dtrace run logging either probefunc or ustack(1) and
|
|
# nothing else. The dtrace -xdemangle option needs to be used.
|
|
#
|
|
# This is particular to OS X at the moment, because of the '_' handling.
|
|
with open(path) as f:
|
|
current_timestamp = None
|
|
for ln in f:
|
|
# Drop leading and trailing whitespace.
|
|
ln = ln.strip()
|
|
if not ln.startswith("dtrace-"):
|
|
continue
|
|
|
|
# If this is a timestamp specifier, extract it.
|
|
if ln.startswith("dtrace-TS: "):
|
|
_, data = ln.split(": ", 1)
|
|
if not data.isdigit():
|
|
print(
|
|
"warning: unrecognized timestamp line %r, ignoring" % ln,
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
current_timestamp = int(data)
|
|
continue
|
|
elif ln.startswith("dtrace-Symbol: "):
|
|
|
|
_, ln = ln.split(": ", 1)
|
|
if not ln:
|
|
continue
|
|
|
|
# If there is a '`' in the line, assume it is a ustack(1) entry in
|
|
# the form of <modulename>`<modulefunc>, where <modulefunc> is never
|
|
# truncated (but does need the mangling patched).
|
|
if "`" in ln:
|
|
yield (current_timestamp, fix_mangling(ln.split("`", 1)[1]))
|
|
continue
|
|
|
|
# Otherwise, assume this is a probefunc printout. DTrace on OS X
|
|
# seems to have a bug where it prints the mangled version of symbols
|
|
# which aren't C++ mangled. We just add a '_' to anything but start
|
|
# which doesn't already have a '_'.
|
|
symbol = fix_mangling(ln)
|
|
|
|
# If we don't know all the symbols, or the symbol is one of them,
|
|
# just return it.
|
|
if not all_symbols_set or symbol in all_symbols_set:
|
|
yield (current_timestamp, symbol)
|
|
continue
|
|
|
|
# Otherwise, we have a symbol name which isn't present in the
|
|
# binary. We assume it is truncated, and try to extend it.
|
|
|
|
# Get all the symbols with this prefix.
|
|
possible_symbols = list(get_symbols_with_prefix(symbol))
|
|
if not possible_symbols:
|
|
continue
|
|
|
|
# If we found too many possible symbols, ignore this as a prefix.
|
|
if len(possible_symbols) > 100:
|
|
print(
|
|
"warning: ignoring symbol %r " % symbol
|
|
+ "(no match and too many possible suffixes)",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
# Report that we resolved a missing symbol.
|
|
if opts.show_missing_symbols and symbol not in missing_symbols:
|
|
print(
|
|
"warning: resolved missing symbol %r" % symbol, file=sys.stderr
|
|
)
|
|
missing_symbols.add(symbol)
|
|
|
|
# Otherwise, treat all the possible matches as having occurred. This
|
|
# is an over-approximation, but it should be ok in practice.
|
|
for s in possible_symbols:
|
|
yield (current_timestamp, s)
|
|
|
|
|
|
def uniq(list):
|
|
seen = set()
|
|
for item in list:
|
|
if item not in seen:
|
|
yield item
|
|
seen.add(item)
|
|
|
|
|
|
def form_by_call_order(symbol_lists):
|
|
# Simply strategy, just return symbols in order of occurrence, even across
|
|
# multiple runs.
|
|
return uniq(s for symbols in symbol_lists for s in symbols)
|
|
|
|
|
|
def form_by_call_order_fair(symbol_lists):
|
|
# More complicated strategy that tries to respect the call order across all
|
|
# of the test cases, instead of giving a huge preference to the first test
|
|
# case.
|
|
|
|
# First, uniq all the lists.
|
|
uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
|
|
|
|
# Compute the successors for each list.
|
|
succs = {}
|
|
for symbols in uniq_lists:
|
|
for a, b in zip(symbols[:-1], symbols[1:]):
|
|
succs[a] = items = succs.get(a, [])
|
|
if b not in items:
|
|
items.append(b)
|
|
|
|
# Emit all the symbols, but make sure to always emit all successors from any
|
|
# call list whenever we see a symbol.
|
|
#
|
|
# There isn't much science here, but this sometimes works better than the
|
|
# more naive strategy. Then again, sometimes it doesn't so more research is
|
|
# probably needed.
|
|
return uniq(
|
|
s
|
|
for symbols in symbol_lists
|
|
for node in symbols
|
|
for s in ([node] + succs.get(node, []))
|
|
)
|
|
|
|
|
|
def form_by_frequency(symbol_lists):
|
|
# Form the order file by just putting the most commonly occurring symbols
|
|
# first. This assumes the data files didn't use the oneshot dtrace method.
|
|
|
|
counts = {}
|
|
for symbols in symbol_lists:
|
|
for a in symbols:
|
|
counts[a] = counts.get(a, 0) + 1
|
|
|
|
by_count = list(counts.items())
|
|
by_count.sort(key=lambda __n: -__n[1])
|
|
return [s for s, n in by_count]
|
|
|
|
|
|
def form_by_random(symbol_lists):
|
|
# Randomize the symbols.
|
|
merged_symbols = uniq(s for symbols in symbol_lists for s in symbols)
|
|
random.shuffle(merged_symbols)
|
|
return merged_symbols
|
|
|
|
|
|
def form_by_alphabetical(symbol_lists):
|
|
# Alphabetize the symbols.
|
|
merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
|
|
merged_symbols.sort()
|
|
return merged_symbols
|
|
|
|
|
|
methods = dict(
|
|
(name[len("form_by_") :], value)
|
|
for name, value in locals().items()
|
|
if name.startswith("form_by_")
|
|
)
|
|
|
|
|
|
def genOrderFile(args):
|
|
parser = argparse.ArgumentParser("%prog [options] <dtrace data file directories>]")
|
|
parser.add_argument("input", nargs="+", help="")
|
|
parser.add_argument(
|
|
"--binary",
|
|
metavar="PATH",
|
|
type=str,
|
|
dest="binary_path",
|
|
help="Path to the binary being ordered (for getting all symbols)",
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
dest="output_path",
|
|
help="path to output order file to write",
|
|
default=None,
|
|
required=True,
|
|
metavar="PATH",
|
|
)
|
|
parser.add_argument(
|
|
"--show-missing-symbols",
|
|
dest="show_missing_symbols",
|
|
help="show symbols which are 'fixed up' to a valid name (requires --binary)",
|
|
action="store_true",
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--output-unordered-symbols",
|
|
dest="output_unordered_symbols_path",
|
|
help="write a list of the unordered symbols to PATH (requires --binary)",
|
|
default=None,
|
|
metavar="PATH",
|
|
)
|
|
parser.add_argument(
|
|
"--method",
|
|
dest="method",
|
|
help="order file generation method to use",
|
|
choices=list(methods.keys()),
|
|
default="call_order",
|
|
)
|
|
opts = parser.parse_args(args)
|
|
|
|
# If the user gave us a binary, get all the symbols in the binary by
|
|
# snarfing 'nm' output.
|
|
if opts.binary_path is not None:
|
|
output = subprocess.check_output(
|
|
["nm", "-P", opts.binary_path], universal_newlines=True
|
|
)
|
|
lines = output.split("\n")
|
|
all_symbols = [ln.split(" ", 1)[0] for ln in lines if ln.strip()]
|
|
print("found %d symbols in binary" % len(all_symbols))
|
|
all_symbols.sort()
|
|
else:
|
|
all_symbols = []
|
|
all_symbols_set = set(all_symbols)
|
|
|
|
# Compute the list of input files.
|
|
input_files = []
|
|
for dirname in opts.input:
|
|
input_files.extend(findFilesWithExtension(dirname, "dtrace"))
|
|
|
|
# Load all of the input files.
|
|
print("loading from %d data files" % len(input_files))
|
|
missing_symbols = set()
|
|
timestamped_symbol_lists = [
|
|
list(
|
|
parse_dtrace_symbol_file(
|
|
path, all_symbols, all_symbols_set, missing_symbols, opts
|
|
)
|
|
)
|
|
for path in input_files
|
|
]
|
|
|
|
# Reorder each symbol list.
|
|
symbol_lists = []
|
|
for timestamped_symbols_list in timestamped_symbol_lists:
|
|
timestamped_symbols_list.sort()
|
|
symbol_lists.append([symbol for _, symbol in timestamped_symbols_list])
|
|
|
|
# Execute the desire order file generation method.
|
|
method = methods.get(opts.method)
|
|
result = list(method(symbol_lists))
|
|
|
|
# Report to the user on what percentage of symbols are present in the order
|
|
# file.
|
|
num_ordered_symbols = len(result)
|
|
if all_symbols:
|
|
print(
|
|
"note: order file contains %d/%d symbols (%.2f%%)"
|
|
% (
|
|
num_ordered_symbols,
|
|
len(all_symbols),
|
|
100.0 * num_ordered_symbols / len(all_symbols),
|
|
),
|
|
file=sys.stderr,
|
|
)
|
|
|
|
if opts.output_unordered_symbols_path:
|
|
ordered_symbols_set = set(result)
|
|
with open(opts.output_unordered_symbols_path, "w") as f:
|
|
f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
|
|
|
|
# Write the order file.
|
|
with open(opts.output_path, "w") as f:
|
|
f.write("\n".join(result))
|
|
f.write("\n")
|
|
|
|
return 0
|
|
|
|
|
|
def filter_bolt_optimized(inputs, instrumented_outputs, readelf):
|
|
new_inputs = []
|
|
new_instrumented_ouputs = []
|
|
for input, instrumented_output in zip(inputs, instrumented_outputs):
|
|
output = subprocess.check_output(
|
|
[readelf, "-WS", input], universal_newlines=True
|
|
)
|
|
|
|
# This binary has already been bolt-optimized, so skip further processing.
|
|
if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE):
|
|
print(f"Skipping {input}, it's already instrumented")
|
|
else:
|
|
new_inputs.append(input)
|
|
new_instrumented_ouputs.append(instrumented_output)
|
|
return new_inputs, new_instrumented_ouputs
|
|
|
|
|
|
def bolt_optimize(args):
|
|
parser = argparse.ArgumentParser("%prog [options] ")
|
|
parser.add_argument("--method", choices=["INSTRUMENT", "PERF", "LBR"])
|
|
parser.add_argument("--input")
|
|
parser.add_argument("--instrumented-output")
|
|
parser.add_argument("--fdata")
|
|
parser.add_argument("--perf-training-binary-dir")
|
|
parser.add_argument("--readelf")
|
|
parser.add_argument("--bolt")
|
|
parser.add_argument("--lit")
|
|
parser.add_argument("--merge-fdata")
|
|
|
|
opts = parser.parse_args(args)
|
|
|
|
inputs = opts.input.split(";")
|
|
instrumented_outputs = opts.instrumented_output.split(";")
|
|
assert len(inputs) == len(
|
|
instrumented_outputs
|
|
), "inconsistent --input / --instrumented-output arguments"
|
|
|
|
inputs, instrumented_outputs = filter_bolt_optimized(inputs,
|
|
instrumented_outputs,
|
|
opts.readelf)
|
|
if not inputs:
|
|
return 0
|
|
|
|
environ = os.environ.copy()
|
|
if opts.method == "INSTRUMENT":
|
|
preloads = []
|
|
for input, instrumented_output in zip(inputs, instrumented_outputs):
|
|
args = [
|
|
opts.bolt,
|
|
input,
|
|
"-o",
|
|
instrumented_output,
|
|
"-instrument",
|
|
"--instrumentation-file-append-pid",
|
|
f"--instrumentation-file={opts.fdata}",
|
|
]
|
|
print("Running: " + " ".join(args))
|
|
process = subprocess.run(
|
|
args,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
)
|
|
|
|
for line in process.stdout:
|
|
sys.stdout.write(line)
|
|
process.check_returncode()
|
|
|
|
# Shared library must be preloaded to be covered.
|
|
if ".so" in input:
|
|
preloads.append(instrumented_output)
|
|
|
|
if preloads:
|
|
print(
|
|
f"Patching execution environment for dynamic libraries: {' '.join(preloads)}"
|
|
)
|
|
environ["LD_PRELOAD"] = os.pathsep.join(preloads)
|
|
|
|
args = [
|
|
sys.executable,
|
|
opts.lit,
|
|
"-v",
|
|
os.path.join(opts.perf_training_binary_dir, f"bolt-fdata"),
|
|
]
|
|
print("Running: " + " ".join(args))
|
|
process = subprocess.run(
|
|
args,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
env=environ,
|
|
)
|
|
|
|
for line in process.stdout:
|
|
sys.stdout.write(line)
|
|
process.check_returncode()
|
|
|
|
if opts.method in ["PERF", "LBR"]:
|
|
perf2bolt([opts.bolt, opts.perf_training_binary_dir, opts.input])
|
|
|
|
merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir])
|
|
|
|
for input in inputs:
|
|
shutil.copy(input, f"{input}-prebolt")
|
|
|
|
args = [
|
|
opts.bolt,
|
|
f"{input}-prebolt",
|
|
"-o",
|
|
input,
|
|
"-data",
|
|
opts.fdata,
|
|
"-reorder-blocks=ext-tsp",
|
|
"-reorder-functions=cdsort",
|
|
"-split-functions",
|
|
"-split-all-cold",
|
|
"-split-eh",
|
|
"-dyno-stats",
|
|
"-use-gnu-stack",
|
|
"-update-debug-sections",
|
|
"-nl" if opts.method == "PERF" else "",
|
|
]
|
|
print("Running: " + " ".join(args))
|
|
process = subprocess.run(
|
|
args,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
)
|
|
|
|
for line in process.stdout:
|
|
sys.stdout.write(line)
|
|
process.check_returncode()
|
|
|
|
|
|
commands = {
|
|
"bolt-optimize": bolt_optimize,
|
|
"clean": clean,
|
|
"merge": merge,
|
|
"dtrace": dtrace,
|
|
"cc1": cc1,
|
|
"gen-order-file": genOrderFile,
|
|
"merge-fdata": merge_fdata,
|
|
"perf": perf,
|
|
"perf2bolt": perf2bolt,
|
|
}
|
|
|
|
|
|
def main():
|
|
f = commands[sys.argv[1]]
|
|
sys.exit(f(sys.argv[2:]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|