Kirill Bobyrev 46a6f5ae14 [clangd] NFC: Move stdlib headers handling to Clang
This will allow moving the IncludeCleaner library essentials to Clang
and decoupling them from the majority of clangd.

The patch itself just moves the code, it doesn't change existing
functionality.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D119130
2022-02-09 11:05:39 +01:00

121 lines
4.8 KiB
Python
Executable File

#!/usr/bin/env python
#===- gen_std.py - ------------------------------------------*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#
"""gen_std.py is a tool to generate a lookup table (from qualified names to
include headers) for C/C++ Standard Library symbols by parsing archieved HTML
files from cppreference.
The generated files are located in clang/include/Tooling/Inclusions.
Caveats and FIXMEs:
- only symbols directly in "std" namespace are added, we should also add std's
subnamespace symbols (e.g. chrono).
- symbols with multiple variants or defined in multiple headers aren't added,
e.g. std::move, std::swap
Usage:
1. Install BeautifulSoup dependency, see instruction:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
https://en.cppreference.com/w/Cppreference:Archives
3. Unzip the zip file from step 2 to directory </cppreference>, you should
get a "reference" directory in </cppreference>
4. Run the command:
// Generate C++ symbols
gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc
// Generate C symbols
gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc
"""
import cppreference_parser
import argparse
import datetime
import os
import sys
CODE_PREFIX = """\
//===-- gen_std.py generated file -------------------------------*- C++ -*-===//
//
// Used to build a lookup table (qualified names => include headers) for %s
// Standard Library symbols.
//
// This file was generated automatically by
// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
//
// Generated from cppreference offline HTML book (modified on %s).
//===----------------------------------------------------------------------===//
"""
def ParseArg():
parser = argparse.ArgumentParser(description='Generate StdGen file')
parser.add_argument('-cppreference', metavar='PATH',
default='',
help='path to the cppreference offline HTML directory',
required=True
)
parser.add_argument('-language',
default='cpp',
help='Generate c or cpp symbols',
required=True)
return parser.parse_args()
def main():
args = ParseArg()
if args.language == 'cpp':
page_root = os.path.join(args.cppreference, "en", "cpp")
symbol_index_root = os.path.join(page_root, "symbol_index")
parse_pages = [
(page_root, "symbol_index.html", "std::"),
# std sub-namespace symbols have separated pages.
# We don't index std literal operators (e.g.
# std::literals::chrono_literals::operator""d), these symbols can't be
# accessed by std::<symbol_name>.
# FIXME: index std::placeholders symbols, placeholders.html page is
# different (which contains one entry for _1, _2, ..., _N), we need special
# handling.
(symbol_index_root, "chrono.html", "std::chrono::"),
(symbol_index_root, "filesystem.html", "std::filesystem::"),
(symbol_index_root, "pmr.html", "std::pmr::"),
(symbol_index_root, "regex_constants.html", "std::regex_constants::"),
(symbol_index_root, "this_thread.html", "std::this_thread::"),
]
elif args.language == 'c':
page_root = os.path.join(args.cppreference, "en", "c")
symbol_index_root = page_root
parse_pages = [(page_root, "index.html", None)]
if not os.path.exists(symbol_index_root):
exit("Path %s doesn't exist!" % symbol_index_root)
symbols = cppreference_parser.GetSymbols(parse_pages)
# We don't have version information from the unzipped offline HTML files.
# so we use the modified time of the symbol_index.html as the version.
index_page_path = os.path.join(page_root, "index.html")
cppreference_modified_date = datetime.datetime.fromtimestamp(
os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
print(CODE_PREFIX % (args.language.upper(), cppreference_modified_date))
for symbol in symbols:
if len(symbol.headers) == 1:
# SYMBOL(unqualified_name, namespace, header)
print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
symbol.headers[0]))
elif len(symbol.headers) == 0:
sys.stderr.write("No header found for symbol %s\n" % symbol.name)
else:
# FIXME: support symbols with multiple headers (e.g. std::move).
sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
symbol.name, ', '.join(symbol.headers)))
if __name__ == '__main__':
main()