#!/usr/bin/env python3 # A tool to automatically generate documentation for the config options of the # clang static analyzer by reading `AnalyzerOptions.def`. import argparse from collections import namedtuple from enum import Enum, auto import re import sys import textwrap # The following code implements a trivial parser for the narrow subset of C++ # which is used in AnalyzerOptions.def. This supports the following features: # - ignores preprocessor directives, even if they are continued with \ at EOL # - ignores comments: both /* ... */ and // ... # - parses string literals (even if they contain \" escapes) # - concatenates adjacent string literals # - parses numbers even if they contain ' as a thousands separator # - recognizes MACRO(arg1, arg2, ..., argN) calls class TT(Enum): "Token type enum." number = auto() ident = auto() string = auto() punct = auto() TOKENS = [ (re.compile(r"-?[0-9']+"), TT.number), (re.compile(r"\w+"), TT.ident), (re.compile(r'"([^\\"]|\\.)*"'), TT.string), (re.compile(r"[(),]"), TT.punct), (re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment (re.compile(r"//.*\n"), None), # C++ style oneline comment (re.compile(r"#.*(\\\n.*)*(?", which is # OK for a terse command line printout, but should be prettified for web # documentation. # Moreover, the option ctu-invocation-list shows some example file content # which is formatted as a preformatted block. paragraphs = [desc] extra = "" if m := re.search(r"(^|\s)Value:", desc): err_handler.record_use_of_tweak("accepted values") paragraphs = [desc[: m.start()], "Accepted values:" + desc[m.end() :]] elif m := re.search(r"\s*Example file.content:", desc): err_handler.record_use_of_tweak("example file content") paragraphs = [desc[: m.start()]] extra = "Example file content::\n\n " + desc[m.end() :] + "\n\n" wrapped = [textwrap.fill(p, width=80) for p in paragraphs if p.strip()] return "\n\n".join(wrapped + [""]) + extra def default_to_rst(tok): if tok.kind == TT.string: if tok.code == '""': return "(empty string)" return tok.code if tok.kind == TT.ident: return tok.code if tok.kind == TT.number: return tok.code.replace("'", "") raise ValueError(f"unexpected token as default value: {tok.kind.name}") def defaults_to_rst_paragraph(defaults): strs = [default_to_rst(d) for d in defaults] if len(strs) == 1: return f"Default value: {strs[0]}\n\n" if len(strs) == 2: return ( f"Default value: {strs[0]} (in shallow mode) / {strs[1]} (in deep mode)\n\n" ) raise ValueError("unexpected count of default values: %d" % len(defaults)) def macro_call_to_rst_paragraphs(macro_call): try: arg_count = len(macro_call.args) param_count = MACRO_NAMES_PARAMCOUNTS[macro_call.name] if arg_count != param_count: raise ValueError( f"expected {param_count} arguments for {macro_call.name}, found {arg_count}" ) _, _, cmdflag, desc, *defaults = macro_call.args return ( cmdflag_to_rst_title(cmdflag) + desc_to_rst_paragraphs(desc) + defaults_to_rst_paragraph(defaults) ) except ValueError as ve: err_handler.report_error(ve.args[0]) return "" def get_option_list(input_file): with open(input_file, encoding="utf-8") as f: contents = f.read() tokens = join_strings(tokenize(contents)) macro_calls = get_calls(tokens, MACRO_NAMES_PARAMCOUNTS) result = "" for mc in macro_calls: result += macro_call_to_rst_paragraphs(mc) return result p = argparse.ArgumentParser() p.add_argument("--options-def", help="path to AnalyzerOptions.def") p.add_argument("--template", help="template file") p.add_argument("--out", help="output file") opts = p.parse_args() with open(opts.template, encoding="utf-8") as f: doc_template = f.read() PLACEHOLDER = ".. OPTIONS_LIST_PLACEHOLDER\n" rst_output = doc_template.replace(PLACEHOLDER, get_option_list(opts.options_def)) err_handler.report_unused_tweaks() with open(opts.out, "w", newline="", encoding="utf-8") as f: f.write(rst_output) if err_handler.seen_errors: sys.exit(1)