[BOLT][DOC] Add script for automatic user guide generation (#93822)

2024-05-31 13:50:51 +01:00 · 2024-05-31 13:50:51 +01:00 · 765ce86991
commit 765ce86991
parent 37ecd43335
2 changed files with 239 additions and 139 deletions
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@ -6,41 +6,37 @@

 ## OPTIONS

-### Generic options
+### Generic options:

 - `-h`

-  Alias for `--help`
+  Alias for --help

 - `--help`

-  Display available options (`--help-hidden` for more).
+  Display available options (--help-hidden for more)

 - `--help-hidden`

-  Display all available options.
+  Display all available options

 - `--help-list`

-  Display list of available options (`--help-list-hidden` for more).
+  Display list of available options (--help-list-hidden for more)

 - `--help-list-hidden`

-  Display list of all available options.
-
- `--print-all-options`
-
-  Print all option values after command line parsing.
-
- `--print-options`
-
-  Print non-default options after command line parsing.
+  Display list of all available options

 - `--version`

-  Display the version of this program.
+  Display the version of this program

-### Output options
+### Output options:
+
+- `--bolt-info`
+
+  Write bolt info section in the output binary

 - `-o <string>`

@ -50,7 +46,7 @@

  Save recorded profile to a file

-### BOLT generic options
+### BOLT generic options:

 - `--align-text=<uint>`

@ -89,15 +85,20 @@

 - `--data=<string>`

-  <data file>
+  data file
+
+- `--data2=<string>`
+
+  data file

 - `--debug-skeleton-cu`

-  Prints out offsets for abbrev and debug_info of Skeleton CUs that get patched.
+  Prints out offsetrs for abbrev and debu_info of Skeleton CUs that get patched.

 - `--deterministic-debuginfo`

-  Disables parallel execution of tasks that may produce nondeterministic debug info
+  Disables parallel execution of tasks that may produce nondeterministic debug
+  info

 - `--dot-tooltip-code`

@ -113,7 +114,7 @@

 - `--dump-dot-all`

-  Dump function CFGs to graphviz format after each stage; enable '-print-loops'
+  Dump function CFGs to graphviz format after each stage;enable '-print-loops'
  for color-coded blocks

 - `--dump-orc`
@ -179,8 +180,8 @@
 - `--hot-text`

  Generate hot text symbols. Apply this option to a precompiled binary that
-  manually calls into hugify, such that at runtime hugify call will put hot
-  code into 2M pages. This requires relocation.
+  manually calls into hugify, such that at runtime hugify call will put hot code
+  into 2M pages. This requires relocation.

 - `--hot-text-move-sections=<sec1,sec2,sec3,...>`

@ -227,15 +228,15 @@
 - `--profile-format=<value>`

  Format to dump profile output in aggregation mode, default is fdata
-  - `=fdata`: offset-based plaintext format
-  - `=yaml`: dense YAML representation
+  - `fdata`: offset-based plaintext format
+  - `yaml`: dense YAML representation

 - `--r11-availability=<value>`

  Determine the availability of r11 before indirect branches
-  - `=never`: r11 not available
-  - `=always`: r11 available before calls and jumps
-  - `=abi`r11 available before calls but not before jumps
+  - `never`: r11 not available
+  - `always`: r11 available before calls and jumps
+  - `abi`: r11 available before calls but not before jumps

 - `--relocs`

@ -283,7 +284,8 @@

 - `--trap-avx512`

-  In relocation mode trap upon entry to any function that uses AVX-512 instructions
+  In relocation mode trap upon entry to any function that uses AVX-512
+  instructions

 - `--trap-old-code`

@ -311,7 +313,7 @@
  Output a single dwarf package file (dwp) instead of multiple non-relocatable
  dwarf object files (dwo).

-### BOLT optimization options
+### BOLT optimization options:

 - `--align-blocks`

@ -357,13 +359,14 @@

 - `--cg-use-split-hot-size`

-  Use hot/cold data on basic blocks to determine hot sizes for call graph functions
+  Use hot/cold data on basic blocks to determine hot sizes for call graph
+  functions

 - `--cold-threshold=<uint>`

  Tenths of percents of main entry frequency to use as a threshold when
-  evaluating whether a basic block is cold (0 means it is only considered
-  cold if the block has zero samples). Default: 0
+  evaluating whether a basic block is cold (0 means it is only considered cold
+  if the block has zero samples). Default: 0

 - `--elim-link-veneers`

@ -375,8 +378,8 @@

 - `--equalize-bb-counts`

-  Use same count for BBs that should have equivalent count (used in non-LBR
-  and shrink wrapping)
+  Use same count for BBs that should have equivalent count (used in non-LBR and
+  shrink wrapping)

 - `--execution-count-threshold=<uint>`

@ -438,8 +441,8 @@

 - `--icp-calls-remaining-percent-threshold=<uint>`

-  The percentage threshold against remaining unpromoted indirect call count
-  for the promotion for calls
+  The percentage threshold against remaining unpromoted indirect call count for
+  the promotion for calls

 - `--icp-calls-topn`

@ -518,22 +521,18 @@

 - `--indirect-call-promotion-jump-tables-topn=<uint>`

-  Limit number of targets to consider when doing indirect call promotion on
-  jump tables. 0 = no limit
-
- `--indirect-call-promotion-mispredict-threshold=<uint>`
-
-  Misprediction threshold for skipping ICP on an indirect call
+  Limit number of targets to consider when doing indirect call promotion on jump
+  tables. 0 = no limit

 - `--indirect-call-promotion-topn=<uint>`

-  Limit number of targets to consider when doing indirect call promotion.
-  0 = no limit
+  Limit number of targets to consider when doing indirect call promotion. 0 = no
+  limit

 - `--indirect-call-promotion-use-mispredicts`

  Use misprediction frequency for determining whether or not ICP should be
-  applied at a callsite. The `-indirect-call-promotion-mispredict-threshold`
+  applied at a callsite.  The -indirect-call-promotion-mispredict-threshold
  value will be used by this heuristic

 - `--infer-fall-throughs`
@ -566,11 +565,13 @@

 - `--inline-small-functions`

-  Inline functions if increase in size is less than defined by `-inline-small-functions-bytes`
+  Inline functions if increase in size is less than defined by -inline-small-
+  functions-bytes

 - `--inline-small-functions-bytes=<uint>`

-  Max number of bytes for the function to be considered small for inlining purposes
+  Max number of bytes for the function to be considered small for inlining
+  purposes

 - `--instrument`

@ -590,7 +591,7 @@
  Make jump tables size smaller at the cost of using more instructions at jump
  sites

- `-jump-tables=<value>`
+- `--jump-tables=<value>`

  Jump tables support (default=basic)
  - `none`: do not optimize functions with jump tables
@ -780,23 +781,22 @@
 - `--split-strategy=<value>`

  Strategy used to partition blocks into fragments
-
-  - `profile2`: split each function into a hot and cold fragment using
-  profiling information
+  - `profile2`: split each function into a hot and cold fragment using profiling
+  information
  - `cdsplit`: split each function into a hot, warm, and cold fragment using
  profiling information
  - `random2`: split each function into a hot and cold fragment at a randomly
  chosen split point (ignoring any available profiling information)
-  - `randomN`: split each function into N fragments at randomly chosen split
+  - `randomN`: split each function into N fragments at a randomly chosen split
  points (ignoring any available profiling information)
-  - `all`: split all basic blocks of each function into fragments such that
-  each fragment contains exactly a single basic block
+  - `all`: split all basic blocks of each function into fragments such that each
+  fragment contains exactly a single basic block

 - `--split-threshold=<uint>`

  Split function only if its main size is reduced by more than given amount of
-  bytes. Default value: 0, i.e. split iff the size is reduced. Note that on
-  some architectures the size can increase after splitting.
+  bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some
+  architectures the size can increase after splitting.

 - `--stale-matching-max-func-size=<uint>`

@ -817,19 +817,20 @@
 - `--tail-duplication=<value>`

  Duplicate unconditional branches that cross a cache line
-
-  - `none` do not apply
-  - `aggressive` aggressive strategy
-  - `moderate` moderate strategy
-  - `cache` cache-aware duplication strategy
+  - `none`: do not apply
+  - `aggressive`: aggressive strategy
+  - `moderate`: moderate strategy
+  - `cache`: cache-aware duplication strategy

 - `--tsp-threshold=<uint>`

-  Maximum number of hot basic blocks in a function for which to use a precise TSP solution while re-ordering basic blocks
+  Maximum number of hot basic blocks in a function for which to use a precise
+  TSP solution while re-ordering basic blocks

 - `--use-aggr-reg-reassign`

-  Use register liveness analysis to try to find more opportunities for -reg-reassign optimization
+  Use register liveness analysis to try to find more opportunities for -reg-
+  reassign optimization

 - `--use-compact-aligner`

@ -847,21 +848,16 @@

  Only apply branch boundary alignment in hot code

- `--x86-strip-redundant-address-size`
+### BOLT options in relocation mode:

-  Remove redundant Address-Size override prefix
-
-### BOLT options in relocation mode
-
- `-align-macro-fusion=<value>`
+- `--align-macro-fusion=<value>`

  Fix instruction alignment for macro-fusion (x86 relocation mode)
-
  - `none`: do not insert alignment no-ops for macro-fusion
  - `hot`: only insert alignment no-ops on hot execution paths (default)
  - `all`: always align instructions to allow macro-fusion

-### BOLT instrumentation options
+### BOLT instrumentation options:

 `llvm-bolt <executable> -instrument [-o outputfile] <instrumented-executable>`

@ -893,72 +889,21 @@

 - `--instrumentation-no-counters-clear`

-  Don't clear counters across dumps (use with `instrumentation-sleep-time` option)
+  Don't clear counters across dumps (use with instrumentation-sleep-time option)

 - `--instrumentation-sleep-time=<uint>`

  Interval between profile writes (default: 0 = write only at program end).
  This is useful for service workloads when you want to dump profile every X
-  minutes or if you are killing the program and the profile is not being
-  dumped at the end.
+  minutes or if you are killing the program and the profile is not being dumped
+  at the end.

 - `--instrumentation-wait-forks`

  Wait until all forks of instrumented process will finish (use with
-  `instrumentation-sleep-time` option)
+  instrumentation-sleep-time option)

-### Data aggregation options (perf2bolt)
-
-`perf2bolt -p perf.data [-o outputfile] perf.fdata <executable>`
-
- `--autofdo`
-
-  Generate autofdo textual data instead of bolt data
-
- `--filter-mem-profile`
-
-  If processing a memory profile, filter out stack or heap accesses that won't
-  be useful for BOLT to reduce profile file size
-
- `--ignore-build-id`
-
-  Continue even if build-ids in input binary and perf.data mismatch
-
- `--ignore-interrupt-lbr`
-
-  Ignore kernel interrupt LBR that happens asynchronously
-
- `--itrace=<string>`
-
-  Generate LBR info with perf itrace argument
-
- `--nl`
-
-  Aggregate basic samples (without LBR info)
-
- `--pa`
-
-  Skip perf and read data from a pre-aggregated file format
-
- `--perfdata=<string>`
-
-  Data file
-
- `--pid=<ulong>`
-
-  Only use samples from process with specified PID
-
- `--time-aggr`
-
-  Time BOLT aggregator
-
- `--use-event-pc`
-
-  Use event PC in combination with LBR sampling
-
-### BOLT printing options
-
-#### Generic options
+### BOLT printing options:

 - `--print-aliases`

@ -1032,10 +977,10 @@
 - `--print-pseudo-probes=<value>`

  Print pseudo probe info
-  - `=decode`: decode probes section from binary
-  - `=address_conversion`: update address2ProbesMap with output block address
-  - `=encoded_probes`: display the encoded probes in binary section
-  - `=all`: enable all debugging printout
+  - `decode`: decode probes section from binary
+  - `address_conversion`: update address2ProbesMap with output block address
+  - `encoded_probes`: display the encoded probes in binary section
+  - `all`: enable all debugging printout

 - `--print-relocations`

@ -1061,11 +1006,13 @@

  Print names of functions with unknown control flow

- `--time-opts`
+- `--time-build`

-  Print time spent in each optimization
+  Print time spent constructing binary functions

-#### Optimization options
+- `--time-rewrite`
+
+  Print time spent in rewriting passes

 - `--print-after-branch-fixup`

@ -1204,10 +1151,14 @@

  Print functions after veneer elimination pass

- `--time-build`
+- `--time-opts`

-  Print time spent constructing binary functions
+  Print time spent in each optimization

- `--time-rewrite`
+- `--print-all-options`

-  Print time spent in rewriting passes
+  Print all option values after command line parsing
+
+- `--print-options`
+
+  Print non-default options after command line parsing
--- a/bolt/docs/generate_doc.py
+++ b/bolt/docs/generate_doc.py
@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# A tool to parse the output of `llvm-bolt --help-hidden` and update the
+# documentation in CommandLineArgumentReference.md automatically.
+# Run from the directory in which this file is located to update the docs.
+
+import subprocess
+from textwrap import wrap
+
+LINE_LIMIT = 80
+
+
+def wrap_text(text, indent, limit=LINE_LIMIT):
+    wrapped_lines = wrap(text, width=limit - len(indent))
+    wrapped_text = ("\n" + indent).join(wrapped_lines)
+    return wrapped_text
+
+
+def add_info(sections, section, option, description):
+    indent = "  "
+    wrapped_description = "\n".join(
+        [
+            wrap_text(line, indent) if len(line) > LINE_LIMIT else line
+            for line in description
+        ]
+    )
+    sections[section].append((option, indent + wrapped_description))
+
+
+def parse_bolt_options(output):
+    section_headers = [
+        "Generic options:",
+        "Output options:",
+        "BOLT generic options:",
+        "BOLT optimization options:",
+        "BOLT options in relocation mode:",
+        "BOLT instrumentation options:",
+        "BOLT printing options:",
+    ]
+
+    sections = {key: [] for key in section_headers}
+    current_section, prev_section = None, None
+    option, description = None, []
+
+    for line in output.split("\n"):
+        cleaned_line = line.strip()
+
+        if cleaned_line.casefold() in map(str.casefold, section_headers):
+            if prev_section != None:  # Save last option from prev section
+                add_info(sections, current_section, option, description)
+                option, description = None, []
+
+            cleaned_line = cleaned_line.split()
+            # Apply lowercase to all words except the first one
+            cleaned_line = [cleaned_line[0]] + [
+                word.lower() for word in cleaned_line[1:]
+            ]
+            # Join the words back together into a string
+            cleaned_line = " ".join(cleaned_line)
+
+            current_section = cleaned_line
+            prev_section = current_section
+            continue
+
+        if cleaned_line.startswith("-"):
+            if option and description:
+                # Join description lines, adding an extra newline for
+                # sub-options that start with '='
+                add_info(sections, current_section, option, description)
+                option, description = None, []
+
+            parts = cleaned_line.split("  ", 1)
+            if len(parts) > 1:
+                option = parts[0].strip()
+                descr = parts[1].strip()
+                descr = descr[2].upper() + descr[3:]
+                description = [descr]
+                if option.startswith("--print") or option.startswith("--time"):
+                    current_section = "BOLT printing options:"
+                elif prev_section != None:
+                    current_section = prev_section
+            continue
+
+        if cleaned_line.startswith("="):
+            parts = cleaned_line.split(maxsplit=1)
+            # Split into two parts: sub-option and description
+            if len(parts) == 2:
+                # Rejoin with a single space
+                cleaned_line = parts[0] + " " + parts[1].rstrip()
+            description.append(cleaned_line)
+        elif cleaned_line:  # Multiline description continuation
+            description.append(cleaned_line)
+
+    add_info(sections, current_section, option, description)
+    return sections
+
+
+def generate_markdown(sections):
+    markdown_lines = [
+        "# BOLT - a post-link optimizer developed to speed up large applications\n",
+        "## SYNOPSIS\n",
+        "`llvm-bolt <executable> [-o outputfile] <executable>.bolt "
+        "[-data=perf.fdata] [options]`\n",
+        "## OPTIONS",
+    ]
+
+    for section, options in sections.items():
+        markdown_lines.append(f"\n### {section}")
+        if section == "BOLT instrumentation options:":
+            markdown_lines.append(
+                f"\n`llvm-bolt <executable> -instrument"
+                " [-o outputfile] <instrumented-executable>`"
+            )
+        for option, desc in options:
+            markdown_lines.append(f"\n- `{option}`\n")
+            # Split description into lines to handle sub-options
+            desc_lines = desc.split("\n")
+            for line in desc_lines:
+                if line.startswith("="):
+                    # Sub-option: correct formatting with bullet
+                    sub_option, sub_desc = line[1:].split(" ", 1)
+                    markdown_lines.append(f"  - `{sub_option}`: {sub_desc[4:]}")
+                else:
+                    # Regular line of description
+                    if line[2:].startswith("<"):
+                        line = line.replace("<", "").replace(">", "")
+                    markdown_lines.append(f"{line}")
+
+    return "\n".join(markdown_lines)
+
+
+def main():
+    try:
+        help_output = subprocess.run(
+            ["llvm-bolt", "--help-hidden"], capture_output=True, text=True, check=True
+        ).stdout
+    except subprocess.CalledProcessError as e:
+        print("Failed to execute llvm-bolt --help:")
+        print(e)
+        return
+
+    sections = parse_bolt_options(help_output)
+    markdown = generate_markdown(sections)
+
+    with open("CommandLineArgumentReference.md", "w") as md_file:
+        md_file.write(markdown)
+
+
+if __name__ == "__main__":
+    main()