
This will allow moving the IncludeCleaner library essentials to Clang and decoupling them from the majority of clangd. The patch itself just moves the code, it doesn't change existing functionality. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D119130
121 lines
4.8 KiB
Python
Executable File
121 lines
4.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#===- gen_std.py - ------------------------------------------*- python -*--===#
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
#===------------------------------------------------------------------------===#
|
|
|
|
"""gen_std.py is a tool to generate a lookup table (from qualified names to
|
|
include headers) for C/C++ Standard Library symbols by parsing archieved HTML
|
|
files from cppreference.
|
|
|
|
The generated files are located in clang/include/Tooling/Inclusions.
|
|
|
|
Caveats and FIXMEs:
|
|
- only symbols directly in "std" namespace are added, we should also add std's
|
|
subnamespace symbols (e.g. chrono).
|
|
- symbols with multiple variants or defined in multiple headers aren't added,
|
|
e.g. std::move, std::swap
|
|
|
|
Usage:
|
|
1. Install BeautifulSoup dependency, see instruction:
|
|
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
|
|
2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
|
|
https://en.cppreference.com/w/Cppreference:Archives
|
|
3. Unzip the zip file from step 2 to directory </cppreference>, you should
|
|
get a "reference" directory in </cppreference>
|
|
4. Run the command:
|
|
// Generate C++ symbols
|
|
gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc
|
|
// Generate C symbols
|
|
gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc
|
|
"""
|
|
|
|
|
|
import cppreference_parser
|
|
import argparse
|
|
import datetime
|
|
import os
|
|
import sys
|
|
|
|
CODE_PREFIX = """\
|
|
//===-- gen_std.py generated file -------------------------------*- C++ -*-===//
|
|
//
|
|
// Used to build a lookup table (qualified names => include headers) for %s
|
|
// Standard Library symbols.
|
|
//
|
|
// This file was generated automatically by
|
|
// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
|
|
//
|
|
// Generated from cppreference offline HTML book (modified on %s).
|
|
//===----------------------------------------------------------------------===//
|
|
"""
|
|
|
|
def ParseArg():
|
|
parser = argparse.ArgumentParser(description='Generate StdGen file')
|
|
parser.add_argument('-cppreference', metavar='PATH',
|
|
default='',
|
|
help='path to the cppreference offline HTML directory',
|
|
required=True
|
|
)
|
|
parser.add_argument('-language',
|
|
default='cpp',
|
|
help='Generate c or cpp symbols',
|
|
required=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = ParseArg()
|
|
if args.language == 'cpp':
|
|
page_root = os.path.join(args.cppreference, "en", "cpp")
|
|
symbol_index_root = os.path.join(page_root, "symbol_index")
|
|
parse_pages = [
|
|
(page_root, "symbol_index.html", "std::"),
|
|
# std sub-namespace symbols have separated pages.
|
|
# We don't index std literal operators (e.g.
|
|
# std::literals::chrono_literals::operator""d), these symbols can't be
|
|
# accessed by std::<symbol_name>.
|
|
# FIXME: index std::placeholders symbols, placeholders.html page is
|
|
# different (which contains one entry for _1, _2, ..., _N), we need special
|
|
# handling.
|
|
(symbol_index_root, "chrono.html", "std::chrono::"),
|
|
(symbol_index_root, "filesystem.html", "std::filesystem::"),
|
|
(symbol_index_root, "pmr.html", "std::pmr::"),
|
|
(symbol_index_root, "regex_constants.html", "std::regex_constants::"),
|
|
(symbol_index_root, "this_thread.html", "std::this_thread::"),
|
|
]
|
|
elif args.language == 'c':
|
|
page_root = os.path.join(args.cppreference, "en", "c")
|
|
symbol_index_root = page_root
|
|
parse_pages = [(page_root, "index.html", None)]
|
|
|
|
if not os.path.exists(symbol_index_root):
|
|
exit("Path %s doesn't exist!" % symbol_index_root)
|
|
|
|
symbols = cppreference_parser.GetSymbols(parse_pages)
|
|
|
|
# We don't have version information from the unzipped offline HTML files.
|
|
# so we use the modified time of the symbol_index.html as the version.
|
|
index_page_path = os.path.join(page_root, "index.html")
|
|
cppreference_modified_date = datetime.datetime.fromtimestamp(
|
|
os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
|
|
print(CODE_PREFIX % (args.language.upper(), cppreference_modified_date))
|
|
for symbol in symbols:
|
|
if len(symbol.headers) == 1:
|
|
# SYMBOL(unqualified_name, namespace, header)
|
|
print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
|
|
symbol.headers[0]))
|
|
elif len(symbol.headers) == 0:
|
|
sys.stderr.write("No header found for symbol %s\n" % symbol.name)
|
|
else:
|
|
# FIXME: support symbols with multiple headers (e.g. std::move).
|
|
sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
|
|
symbol.name, ', '.join(symbol.headers)))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|