[lldb] Add C source output mode to formatter_bytecode.py (#184242)

Add the ability to generate a C source file, which is in addition to the
existing functionality of generating binary.

An example of the generated source:

```c
#ifdef __APPLE__
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
#else
#define FORMATTER_SECTION ".lldbformatters"
#endif

__attribute__((used, section(FORMATTER_SECTION)))
unsigned char _Account_synthetic[] =
    // version
    "\x01"
    // remaining record size
    "\x15"
    // type name size
    "\x07"
    // type name
    "Account"
    // flags
    "\x00"
    // sig_get_num_children
    "\x02"
    // program size
    "\x02"
    // program
    "\x20\x01"
    // sig_get_child_at_index
    "\x04"
    // program size
    "\x06"
    // program
    "\x02\x20\x00\x23\x11\x60"
;
```
This commit is contained in:
Dave Lee 2026-03-03 15:48:30 -08:00 committed by GitHub
parent 76568dc899
commit ece4b75932
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -17,6 +17,7 @@ if path in sys.path:
import re
import io
import textwrap
from dataclasses import dataclass
from typing import BinaryIO, TextIO, Tuple, Union
@ -207,9 +208,7 @@ class BytecodeSection:
raise ValueError(f"duplicate signature: {sig}")
seen.add(sig)
def write_binary(self, output: BinaryIO) -> None:
self.validate()
def _to_binary(self) -> bytes:
bin = bytearray()
bin.extend(_to_uleb(len(self.type_name)))
bin.extend(bytes(self.type_name, encoding="utf-8"))
@ -219,9 +218,77 @@ class BytecodeSection:
bin.extend(_to_uleb(len(bc)))
bin.extend(bc)
return bytes(bin)
def write_binary(self, output: BinaryIO) -> None:
self.validate()
bin = self._to_binary()
output.write(_to_byte(BINARY_VERSION))
output.write(_to_uleb(len(bin)))
output.write(bin)
output.write(self._to_binary())
class _CBuilder:
"""Helper class for emitting binary data as a C-string literal."""
entries: list[Tuple[str, str]]
def __init__(self) -> None:
self.entries = []
def add_byte(self, x: int, comment: str) -> None:
self.add_bytes(_to_byte(x), comment)
def add_uleb(self, x: int, comment: str) -> None:
self.add_bytes(_to_uleb(x), comment)
def add_bytes(self, x: bytes, comment: str) -> None:
# Construct zero padded hex values with length two.
string = "".join(f"\\x{b:02x}" for b in x)
self.add_string(string, comment)
def add_string(self, string: str, comment: str) -> None:
self.entries.append((f'"{string}"', comment))
def write_source(self, output: TextIO) -> None:
self.validate()
size = len(self._to_binary())
b = self._CBuilder()
b.add_byte(BINARY_VERSION, "version")
b.add_uleb(size, "remaining record size")
b.add_uleb(len(self.type_name), "type name size")
b.add_string(self.type_name, "type name")
b.add_byte(self.flags, "flags")
for sig, bc in self.signatures:
b.add_byte(SIGNATURES[sig], f"sig_{sig}")
b.add_uleb(len(bc), "program size")
b.add_bytes(bc, "program")
print(
textwrap.dedent(
"""
#ifdef __APPLE__
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
#else
#define FORMATTER_SECTION ".lldbformatters"
#endif
"""
),
file=output,
)
var_name = re.sub(r"\W", "_", self.type_name)
print(
"__attribute__((used, section(FORMATTER_SECTION)))",
file=output,
)
print(f"unsigned char _{var_name}_synthetic[] =", file=output)
indent = " "
for string, comment in b.entries:
print(f"{indent}// {comment}", file=output)
print(f"{indent}{string}", file=output)
print(";", file=output)
def compile_file(type_name: str, input: TextIO) -> BytecodeSection:
@ -601,7 +668,7 @@ def interpret(bytecode: bytes, control: list, data: list, tracing: bool = False)
################################################################################
def _to_uleb(value: int) -> bytearray:
def _to_uleb(value: int) -> bytes:
"""Encode an integer to ULEB128 bytes."""
if value < 0:
raise ValueError(f"negative number cannot be encoded to ULEB128: {value}")
@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray:
if value == 0:
break
return result
return bytes(result)
def _from_uleb(stream: BinaryIO) -> int:
@ -665,18 +732,27 @@ def _main():
"--output",
help="output file (required for --compile)",
)
parser.add_argument(
"-f",
"--format",
choices=("binary", "c"),
default="binary",
help="output file format",
)
parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
args = parser.parse_args()
if args.compile:
if not args.output:
parser.error("--output is required with --compile")
with (
open(args.input) as input,
open(args.output, "wb") as output,
):
with open(args.input) as input:
section = compile_file(args.type_name, input)
section.write_binary(output)
if args.format == "binary":
with open(args.output, "wb") as output:
section.write_binary(output)
else: # args.format == "c"
with open(args.output, "w") as output:
section.write_source(output)
elif args.disassemble:
if args.output:
with (
@ -758,4 +834,42 @@ if __name__ == "__main__":
with self.assertRaises(ValueError):
run_compile("MyType", "@summary: 1u return\n@summary: 2u return")
def test_write_source(self):
# Use the Account example from main.cpp as a reference, whose
# exact byte values are known.
section = BytecodeSection(
type_name="Account",
flags=0,
signatures=[
("get_num_children", bytes([0x20, 0x01])),
("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])),
],
)
out = io.StringIO()
section.write_source(out)
src = out.getvalue()
self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src)
self.assertIn("unsigned char _Account_synthetic[] =", src)
self.assertIn('"\\x01"', src) # version
self.assertIn('"\\x15"', src) # record size (21)
self.assertIn('"\\x07"', src) # type name size (7)
self.assertIn('"Account"', src) # type name
self.assertIn('"\\x00"', src) # flags
self.assertIn('"\\x02"', src) # sig_get_num_children
self.assertIn('"\\x20\\x01"', src) # program
self.assertIn('"\\x04"', src) # sig_get_child_at_index
self.assertIn('"\\x06"', src) # program size
self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program
self.assertIn("// version", src)
self.assertIn("// type name", src)
self.assertIn("// program", src)
# Semicolon terminates the array initializer.
self.assertEqual(src.count(";"), 1)
# Non-identifier characters in the type name are replaced with '_'.
out2 = io.StringIO()
BytecodeSection("std::vector<int>", 0, []).write_source(out2)
self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue())
unittest.main(argv=[__file__])