
This commit documents the process of specifying values for the analyzer options and checker options implemented in the static analyzer, and adds a script which includes the documentation of the analyzer options (which was previously only available through a command-line flag) in the RST-based web documentation.
294 lines
9.2 KiB
Python
294 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
# A tool to automatically generate documentation for the config options of the
|
|
# clang static analyzer by reading `AnalyzerOptions.def`.
|
|
|
|
import argparse
|
|
from collections import namedtuple
|
|
from enum import Enum, auto
|
|
import re
|
|
import sys
|
|
import textwrap
|
|
|
|
|
|
# The following code implements a trivial parser for the narrow subset of C++
|
|
# which is used in AnalyzerOptions.def. This supports the following features:
|
|
# - ignores preprocessor directives, even if they are continued with \ at EOL
|
|
# - ignores comments: both /* ... */ and // ...
|
|
# - parses string literals (even if they contain \" escapes)
|
|
# - concatenates adjacent string literals
|
|
# - parses numbers even if they contain ' as a thousands separator
|
|
# - recognizes MACRO(arg1, arg2, ..., argN) calls
|
|
|
|
|
|
class TT(Enum):
|
|
"Token type enum."
|
|
number = auto()
|
|
ident = auto()
|
|
string = auto()
|
|
punct = auto()
|
|
|
|
|
|
TOKENS = [
|
|
(re.compile(r"-?[0-9']+"), TT.number),
|
|
(re.compile(r"\w+"), TT.ident),
|
|
(re.compile(r'"([^\\"]|\\.)*"'), TT.string),
|
|
(re.compile(r"[(),]"), TT.punct),
|
|
(re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment
|
|
(re.compile(r"//.*\n"), None), # C++ style oneline comment
|
|
(re.compile(r"#.*(\\\n.*)*(?<!\\)\n"), None), # preprocessor directive
|
|
(re.compile(r"\s+"), None), # whitespace
|
|
]
|
|
|
|
Token = namedtuple("Token", "kind code")
|
|
|
|
|
|
class ErrorHandler:
|
|
def __init__(self):
|
|
self.seen_errors = False
|
|
|
|
# This script uses some heuristical tweaks to modify the documentation
|
|
# of some analyzer options. As this code is fragile, we record the use
|
|
# of these tweaks and report them if they become obsolete:
|
|
self.unused_tweaks = [
|
|
"escape star",
|
|
"escape underline",
|
|
"accepted values",
|
|
"example file content",
|
|
]
|
|
|
|
def record_use_of_tweak(self, tweak_name):
|
|
try:
|
|
self.unused_tweaks.remove(tweak_name)
|
|
except ValueError:
|
|
pass
|
|
|
|
def replace_as_tweak(self, string, pattern, repl, tweak_name):
|
|
res = string.replace(pattern, repl)
|
|
if res != string:
|
|
self.record_use_of_tweak(tweak_name)
|
|
return res
|
|
|
|
def report_error(self, msg):
|
|
print("Error:", msg, file=sys.stderr)
|
|
self.seen_errors = True
|
|
|
|
def report_unexpected_char(self, s, pos):
|
|
lines = (s[:pos] + "X").split("\n")
|
|
lineno, col = (len(lines), len(lines[-1]))
|
|
self.report_error(
|
|
"unexpected character %r in AnalyzerOptions.def at line %d column %d"
|
|
% (s[pos], lineno, col),
|
|
)
|
|
|
|
def report_unused_tweaks(self):
|
|
if not self.unused_tweaks:
|
|
return
|
|
_is = " is" if len(self.unused_tweaks) == 1 else "s are"
|
|
names = ", ".join(self.unused_tweaks)
|
|
self.report_error(f"textual tweak{_is} unused in script: {names}")
|
|
|
|
|
|
err_handler = ErrorHandler()
|
|
|
|
|
|
def tokenize(s):
|
|
result = []
|
|
pos = 0
|
|
while pos < len(s):
|
|
for regex, kind in TOKENS:
|
|
if m := regex.match(s, pos):
|
|
if kind is not None:
|
|
result.append(Token(kind, m.group(0)))
|
|
pos = m.end()
|
|
break
|
|
else:
|
|
err_handler.report_unexpected_char(s, pos)
|
|
pos += 1
|
|
return result
|
|
|
|
|
|
def join_strings(tokens):
|
|
result = []
|
|
for tok in tokens:
|
|
if tok.kind == TT.string and result and result[-1].kind == TT.string:
|
|
# If this token is a string, and the previous non-ignored token is
|
|
# also a string, then merge them into a single token. We need to
|
|
# discard the closing " of the previous string and the opening " of
|
|
# this string.
|
|
prev = result.pop()
|
|
result.append(Token(TT.string, prev.code[:-1] + tok.code[1:]))
|
|
else:
|
|
result.append(tok)
|
|
return result
|
|
|
|
|
|
MacroCall = namedtuple("MacroCall", "name args")
|
|
|
|
|
|
class State(Enum):
|
|
"States of the state machine used for parsing the macro calls."
|
|
init = auto()
|
|
after_ident = auto()
|
|
before_arg = auto()
|
|
after_arg = auto()
|
|
|
|
|
|
def get_calls(tokens, macro_names):
|
|
state = State.init
|
|
result = []
|
|
current = None
|
|
for tok in tokens:
|
|
if state == State.init and tok.kind == TT.ident and tok.code in macro_names:
|
|
current = MacroCall(tok.code, [])
|
|
state = State.after_ident
|
|
elif state == State.after_ident and tok == Token(TT.punct, "("):
|
|
state = State.before_arg
|
|
elif state == State.before_arg:
|
|
if current is not None:
|
|
current.args.append(tok)
|
|
state = State.after_arg
|
|
elif state == State.after_arg and tok.kind == TT.punct:
|
|
if tok.code == ")":
|
|
result.append(current)
|
|
current = None
|
|
state = State.init
|
|
elif tok.code == ",":
|
|
state = State.before_arg
|
|
else:
|
|
current = None
|
|
state = State.init
|
|
return result
|
|
|
|
|
|
# The information will be extracted from calls to these two macros:
|
|
# #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL)
|
|
# #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC,
|
|
# SHALLOW_VAL, DEEP_VAL)
|
|
|
|
MACRO_NAMES_PARAMCOUNTS = {
|
|
"ANALYZER_OPTION": 5,
|
|
"ANALYZER_OPTION_DEPENDS_ON_USER_MODE": 6,
|
|
}
|
|
|
|
|
|
def string_value(tok):
|
|
if tok.kind != TT.string:
|
|
raise ValueError(f"expected a string token, got {tok.kind.name}")
|
|
text = tok.code[1:-1] # Remove quotes
|
|
text = re.sub(r"\\(.)", r"\1", text) # Resolve backslash escapes
|
|
return text
|
|
|
|
|
|
def cmdflag_to_rst_title(cmdflag_tok):
|
|
text = string_value(cmdflag_tok)
|
|
underline = "-" * len(text)
|
|
ref = f".. _analyzer-option-{text}:"
|
|
|
|
return f"{ref}\n\n{text}\n{underline}\n\n"
|
|
|
|
|
|
def desc_to_rst_paragraphs(tok):
|
|
desc = string_value(tok)
|
|
|
|
# Escape some characters that have special meaning in RST:
|
|
desc = err_handler.replace_as_tweak(desc, "*", r"\*", "escape star")
|
|
desc = err_handler.replace_as_tweak(desc, "_", r"\_", "escape underline")
|
|
|
|
# Many descriptions end with "Value: <list of accepted values>", which is
|
|
# OK for a terse command line printout, but should be prettified for web
|
|
# documentation.
|
|
# Moreover, the option ctu-invocation-list shows some example file content
|
|
# which is formatted as a preformatted block.
|
|
paragraphs = [desc]
|
|
extra = ""
|
|
if m := re.search(r"(^|\s)Value:", desc):
|
|
err_handler.record_use_of_tweak("accepted values")
|
|
paragraphs = [desc[: m.start()], "Accepted values:" + desc[m.end() :]]
|
|
elif m := re.search(r"\s*Example file.content:", desc):
|
|
err_handler.record_use_of_tweak("example file content")
|
|
paragraphs = [desc[: m.start()]]
|
|
extra = "Example file content::\n\n " + desc[m.end() :] + "\n\n"
|
|
|
|
wrapped = [textwrap.fill(p, width=80) for p in paragraphs if p.strip()]
|
|
|
|
return "\n\n".join(wrapped + [""]) + extra
|
|
|
|
|
|
def default_to_rst(tok):
|
|
if tok.kind == TT.string:
|
|
if tok.code == '""':
|
|
return "(empty string)"
|
|
return tok.code
|
|
if tok.kind == TT.ident:
|
|
return tok.code
|
|
if tok.kind == TT.number:
|
|
return tok.code.replace("'", "")
|
|
raise ValueError(f"unexpected token as default value: {tok.kind.name}")
|
|
|
|
|
|
def defaults_to_rst_paragraph(defaults):
|
|
strs = [default_to_rst(d) for d in defaults]
|
|
|
|
if len(strs) == 1:
|
|
return f"Default value: {strs[0]}\n\n"
|
|
if len(strs) == 2:
|
|
return (
|
|
f"Default value: {strs[0]} (in shallow mode) / {strs[1]} (in deep mode)\n\n"
|
|
)
|
|
raise ValueError("unexpected count of default values: %d" % len(defaults))
|
|
|
|
|
|
def macro_call_to_rst_paragraphs(macro_call):
|
|
try:
|
|
arg_count = len(macro_call.args)
|
|
param_count = MACRO_NAMES_PARAMCOUNTS[macro_call.name]
|
|
if arg_count != param_count:
|
|
raise ValueError(
|
|
f"expected {param_count} arguments for {macro_call.name}, found {arg_count}"
|
|
)
|
|
|
|
_, _, cmdflag, desc, *defaults = macro_call.args
|
|
|
|
return (
|
|
cmdflag_to_rst_title(cmdflag)
|
|
+ desc_to_rst_paragraphs(desc)
|
|
+ defaults_to_rst_paragraph(defaults)
|
|
)
|
|
except ValueError as ve:
|
|
err_handler.report_error(ve.args[0])
|
|
return ""
|
|
|
|
|
|
def get_option_list(input_file):
|
|
with open(input_file, encoding="utf-8") as f:
|
|
contents = f.read()
|
|
tokens = join_strings(tokenize(contents))
|
|
macro_calls = get_calls(tokens, MACRO_NAMES_PARAMCOUNTS)
|
|
|
|
result = ""
|
|
for mc in macro_calls:
|
|
result += macro_call_to_rst_paragraphs(mc)
|
|
return result
|
|
|
|
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("--options-def", help="path to AnalyzerOptions.def")
|
|
p.add_argument("--template", help="template file")
|
|
p.add_argument("--out", help="output file")
|
|
opts = p.parse_args()
|
|
|
|
with open(opts.template, encoding="utf-8") as f:
|
|
doc_template = f.read()
|
|
|
|
PLACEHOLDER = ".. OPTIONS_LIST_PLACEHOLDER\n"
|
|
|
|
rst_output = doc_template.replace(PLACEHOLDER, get_option_list(opts.options_def))
|
|
|
|
err_handler.report_unused_tweaks()
|
|
|
|
with open(opts.out, "w", newline="", encoding="utf-8") as f:
|
|
f.write(rst_output)
|
|
|
|
if err_handler.seen_errors:
|
|
sys.exit(1)
|