[lldb] Add C source output mode to formatter_bytecode.py (#184242)
Add the ability to generate a C source file, which is in addition to the
existing functionality of generating binary.
An example of the generated source:
```c
#ifdef __APPLE__
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
#else
#define FORMATTER_SECTION ".lldbformatters"
#endif
__attribute__((used, section(FORMATTER_SECTION)))
unsigned char _Account_synthetic[] =
// version
"\x01"
// remaining record size
"\x15"
// type name size
"\x07"
// type name
"Account"
// flags
"\x00"
// sig_get_num_children
"\x02"
// program size
"\x02"
// program
"\x20\x01"
// sig_get_child_at_index
"\x04"
// program size
"\x06"
// program
"\x02\x20\x00\x23\x11\x60"
;
```
This commit is contained in:
parent
76568dc899
commit
ece4b75932
@ -17,6 +17,7 @@ if path in sys.path:
|
||||
|
||||
import re
|
||||
import io
|
||||
import textwrap
|
||||
from dataclasses import dataclass
|
||||
from typing import BinaryIO, TextIO, Tuple, Union
|
||||
|
||||
@ -207,9 +208,7 @@ class BytecodeSection:
|
||||
raise ValueError(f"duplicate signature: {sig}")
|
||||
seen.add(sig)
|
||||
|
||||
def write_binary(self, output: BinaryIO) -> None:
|
||||
self.validate()
|
||||
|
||||
def _to_binary(self) -> bytes:
|
||||
bin = bytearray()
|
||||
bin.extend(_to_uleb(len(self.type_name)))
|
||||
bin.extend(bytes(self.type_name, encoding="utf-8"))
|
||||
@ -219,9 +218,77 @@ class BytecodeSection:
|
||||
bin.extend(_to_uleb(len(bc)))
|
||||
bin.extend(bc)
|
||||
|
||||
return bytes(bin)
|
||||
|
||||
def write_binary(self, output: BinaryIO) -> None:
|
||||
self.validate()
|
||||
|
||||
bin = self._to_binary()
|
||||
output.write(_to_byte(BINARY_VERSION))
|
||||
output.write(_to_uleb(len(bin)))
|
||||
output.write(bin)
|
||||
output.write(self._to_binary())
|
||||
|
||||
class _CBuilder:
|
||||
"""Helper class for emitting binary data as a C-string literal."""
|
||||
|
||||
entries: list[Tuple[str, str]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.entries = []
|
||||
|
||||
def add_byte(self, x: int, comment: str) -> None:
|
||||
self.add_bytes(_to_byte(x), comment)
|
||||
|
||||
def add_uleb(self, x: int, comment: str) -> None:
|
||||
self.add_bytes(_to_uleb(x), comment)
|
||||
|
||||
def add_bytes(self, x: bytes, comment: str) -> None:
|
||||
# Construct zero padded hex values with length two.
|
||||
string = "".join(f"\\x{b:02x}" for b in x)
|
||||
self.add_string(string, comment)
|
||||
|
||||
def add_string(self, string: str, comment: str) -> None:
|
||||
self.entries.append((f'"{string}"', comment))
|
||||
|
||||
def write_source(self, output: TextIO) -> None:
|
||||
self.validate()
|
||||
|
||||
size = len(self._to_binary())
|
||||
|
||||
b = self._CBuilder()
|
||||
b.add_byte(BINARY_VERSION, "version")
|
||||
b.add_uleb(size, "remaining record size")
|
||||
b.add_uleb(len(self.type_name), "type name size")
|
||||
b.add_string(self.type_name, "type name")
|
||||
b.add_byte(self.flags, "flags")
|
||||
for sig, bc in self.signatures:
|
||||
b.add_byte(SIGNATURES[sig], f"sig_{sig}")
|
||||
b.add_uleb(len(bc), "program size")
|
||||
b.add_bytes(bc, "program")
|
||||
|
||||
print(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
#ifdef __APPLE__
|
||||
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
|
||||
#else
|
||||
#define FORMATTER_SECTION ".lldbformatters"
|
||||
#endif
|
||||
"""
|
||||
),
|
||||
file=output,
|
||||
)
|
||||
var_name = re.sub(r"\W", "_", self.type_name)
|
||||
print(
|
||||
"__attribute__((used, section(FORMATTER_SECTION)))",
|
||||
file=output,
|
||||
)
|
||||
print(f"unsigned char _{var_name}_synthetic[] =", file=output)
|
||||
indent = " "
|
||||
for string, comment in b.entries:
|
||||
print(f"{indent}// {comment}", file=output)
|
||||
print(f"{indent}{string}", file=output)
|
||||
print(";", file=output)
|
||||
|
||||
|
||||
def compile_file(type_name: str, input: TextIO) -> BytecodeSection:
|
||||
@ -601,7 +668,7 @@ def interpret(bytecode: bytes, control: list, data: list, tracing: bool = False)
|
||||
################################################################################
|
||||
|
||||
|
||||
def _to_uleb(value: int) -> bytearray:
|
||||
def _to_uleb(value: int) -> bytes:
|
||||
"""Encode an integer to ULEB128 bytes."""
|
||||
if value < 0:
|
||||
raise ValueError(f"negative number cannot be encoded to ULEB128: {value}")
|
||||
@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray:
|
||||
if value == 0:
|
||||
break
|
||||
|
||||
return result
|
||||
return bytes(result)
|
||||
|
||||
|
||||
def _from_uleb(stream: BinaryIO) -> int:
|
||||
@ -665,18 +732,27 @@ def _main():
|
||||
"--output",
|
||||
help="output file (required for --compile)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--format",
|
||||
choices=("binary", "c"),
|
||||
default="binary",
|
||||
help="output file format",
|
||||
)
|
||||
parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.compile:
|
||||
if not args.output:
|
||||
parser.error("--output is required with --compile")
|
||||
with (
|
||||
open(args.input) as input,
|
||||
open(args.output, "wb") as output,
|
||||
):
|
||||
with open(args.input) as input:
|
||||
section = compile_file(args.type_name, input)
|
||||
section.write_binary(output)
|
||||
if args.format == "binary":
|
||||
with open(args.output, "wb") as output:
|
||||
section.write_binary(output)
|
||||
else: # args.format == "c"
|
||||
with open(args.output, "w") as output:
|
||||
section.write_source(output)
|
||||
elif args.disassemble:
|
||||
if args.output:
|
||||
with (
|
||||
@ -758,4 +834,42 @@ if __name__ == "__main__":
|
||||
with self.assertRaises(ValueError):
|
||||
run_compile("MyType", "@summary: 1u return\n@summary: 2u return")
|
||||
|
||||
def test_write_source(self):
|
||||
# Use the Account example from main.cpp as a reference, whose
|
||||
# exact byte values are known.
|
||||
section = BytecodeSection(
|
||||
type_name="Account",
|
||||
flags=0,
|
||||
signatures=[
|
||||
("get_num_children", bytes([0x20, 0x01])),
|
||||
("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])),
|
||||
],
|
||||
)
|
||||
out = io.StringIO()
|
||||
section.write_source(out)
|
||||
src = out.getvalue()
|
||||
|
||||
self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src)
|
||||
self.assertIn("unsigned char _Account_synthetic[] =", src)
|
||||
self.assertIn('"\\x01"', src) # version
|
||||
self.assertIn('"\\x15"', src) # record size (21)
|
||||
self.assertIn('"\\x07"', src) # type name size (7)
|
||||
self.assertIn('"Account"', src) # type name
|
||||
self.assertIn('"\\x00"', src) # flags
|
||||
self.assertIn('"\\x02"', src) # sig_get_num_children
|
||||
self.assertIn('"\\x20\\x01"', src) # program
|
||||
self.assertIn('"\\x04"', src) # sig_get_child_at_index
|
||||
self.assertIn('"\\x06"', src) # program size
|
||||
self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program
|
||||
self.assertIn("// version", src)
|
||||
self.assertIn("// type name", src)
|
||||
self.assertIn("// program", src)
|
||||
# Semicolon terminates the array initializer.
|
||||
self.assertEqual(src.count(";"), 1)
|
||||
|
||||
# Non-identifier characters in the type name are replaced with '_'.
|
||||
out2 = io.StringIO()
|
||||
BytecodeSection("std::vector<int>", 0, []).write_source(out2)
|
||||
self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue())
|
||||
|
||||
unittest.main(argv=[__file__])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user