[MLGO] Modernize type annotations in mlgo-utils (#187408)

Use the 3.10+ syntax while supporting older versions usin an import from
__future__.
This commit is contained in:
Aiden Grossman 2026-03-18 17:42:36 -07:00 committed by GitHub
parent 39b6a4d84a
commit d434d82010
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 34 additions and 30 deletions

View File

@ -3,6 +3,10 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Library functions for IR extraction."""
# TODO(boomanaiden154): Remove this import once we have upgrade to python 3.10
# which supports the relevant type annotations by default.
from __future__ import annotations
import os
import pathlib
import re
@ -13,14 +17,12 @@ import functools
import json
import logging
from typing import Dict, List, Optional
_UNSPECIFIED_OVERRIDE = ["<UNSPECIFIED>"]
# TODO(ml-compiler-opt): maybe we can also convert here the cmdline file,from a
# \0 - separated list of strings, to a \n one.
def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool:
def should_include_module(cmdline: str, match_regexp: str | None) -> bool:
"""Determine if the module should be included."""
if match_regexp is None:
return True
@ -28,7 +30,7 @@ def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool:
return any(len(re.findall(match_regexp, l)) for l in lines)
def get_thinlto_index(cmdline: str, basedir: str) -> Optional[str]:
def get_thinlto_index(cmdline: str, basedir: str) -> str | None:
opts = cmdline.split("\0")
for option in opts:
if option.startswith("-fthinlto-index"):
@ -122,11 +124,11 @@ class TrainingIRExtractor:
def _extract_clang_artifacts(
self,
llvm_objcopy_path: str,
cmd_filter: Optional[str],
cmd_filter: str | None,
is_thinlto: bool,
cmd_section_name: str,
bitcode_section_name: str,
) -> Optional[str]:
) -> str | None:
"""Run llvm-objcopy to extract the .bc and command line."""
if not os.path.exists(self.input_obj()):
logging.info("%s does not exist.", self.input_obj())
@ -173,7 +175,7 @@ class TrainingIRExtractor:
)
return self.relative_output_path()
def _extract_lld_artifacts(self) -> Optional[str]:
def _extract_lld_artifacts(self) -> str | None:
"""Extract the .bc file with ThinLTO index from an lld ThinLTO invocation."""
if not os.path.exists(self.lld_src_bc()):
logging.info("%s does not exist.", self.lld_src_bc())
@ -193,12 +195,12 @@ class TrainingIRExtractor:
def extract(
self,
llvm_objcopy_path: Optional[str] = None,
cmd_filter: Optional[str] = None,
thinlto_build: Optional[str] = None,
cmd_section_name: Optional[str] = ".llvmcmd",
bitcode_section_name: Optional[str] = ".llvmbc",
) -> Optional[str]:
llvm_objcopy_path: str | None = None,
cmd_filter: str | None = None,
thinlto_build: str | None = None,
cmd_section_name: str | None = ".llvmcmd",
bitcode_section_name: str | None = ".llvmbc",
) -> str | None:
if thinlto_build == "local":
return self._extract_lld_artifacts()
return self._extract_clang_artifacts(
@ -211,8 +213,8 @@ class TrainingIRExtractor:
def convert_compile_command_to_objectfile(
command: Dict[str, str], output_dir: str
) -> Optional[TrainingIRExtractor]:
command: dict[str, str], output_dir: str
) -> TrainingIRExtractor | None:
obj_base_dir = command["directory"]
if "arguments" in command:
cmd_parts = command["arguments"]
@ -238,8 +240,8 @@ def convert_compile_command_to_objectfile(
def load_from_compile_commands(
json_array: List[Dict[str, str]], output_dir: str
) -> List[TrainingIRExtractor]:
json_array: list[dict[str, str]], output_dir: str
) -> list[TrainingIRExtractor]:
objs = [
convert_compile_command_to_objectfile(cmd, output_dir) for cmd in json_array
]
@ -248,8 +250,8 @@ def load_from_compile_commands(
def load_from_lld_params(
params_array: List[str], obj_base_dir: str, output_dir: str
) -> List[TrainingIRExtractor]:
params_array: list[str], obj_base_dir: str, output_dir: str
) -> list[TrainingIRExtractor]:
"""Create an ObjectFile array based on lld's parameters."""
# yank out -o and the output. After that, anything not starting with '-', and
# ending in a '.o', is an object file.
@ -275,7 +277,7 @@ def load_from_lld_params(
def load_from_directory(
obj_base_dir: str, output_dir: str
) -> List[TrainingIRExtractor]:
) -> list[TrainingIRExtractor]:
"""Create an object file array by globbing an entire drectory.
Args:
@ -298,7 +300,7 @@ def load_from_directory(
def load_for_lld_thinlto(
obj_base_dir: str, output_dir: str
) -> List[TrainingIRExtractor]:
) -> list[TrainingIRExtractor]:
# .3.import.bc is the suffix attached to post-merge-pre-opt ('postimport')
# IR bitcode saved by lld. It is hardcoded into lld. ThinLTO index files
# are also emitted next to the postimport bitcode, with the suffix
@ -340,10 +342,10 @@ def load_bazel_aquery(aquery_json, obj_base_dir: str, output_dir: str):
def run_extraction(
objs: List[TrainingIRExtractor],
objs: list[TrainingIRExtractor],
num_workers: int,
llvm_objcopy_path: str,
cmd_filter: Optional[str],
cmd_filter: str | None,
thinlto_build: str,
cmd_section_name: str,
bitcode_section_name: str,
@ -383,7 +385,7 @@ def run_extraction(
def write_corpus_manifest(
thinlto_build: str, relative_output_paths: List[str], output_dir: str
thinlto_build: str, relative_output_paths: list[str], output_dir: str
):
"""Writes a corpus_manifest.json containing all necessary information about
the corpus.

View File

@ -3,17 +3,19 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Library functions for making a corpus from arbitrary bitcode."""
# TODO(boomanaiden154): Remove this import once we have upgrade to python 3.10
# which supports the relevant type annotations by default.
from __future__ import annotations
import pathlib
import os
import shutil
import json
from typing import List, Optional
BITCODE_EXTENSION = ".bc"
def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]:
def load_bitcode_from_directory(bitcode_base_dir: str) -> list[str]:
"""Finds bitcode files to extract from a given directory.
Args:
@ -33,7 +35,7 @@ def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]:
def copy_bitcode(
relative_paths: List[str], bitcode_base_dir: str, output_dir: str
relative_paths: list[str], bitcode_base_dir: str, output_dir: str
) -> None:
"""Copies bitcode files from the base directory to the output directory.
@ -51,9 +53,9 @@ def copy_bitcode(
def write_corpus_manifest(
relative_output_paths: List[str],
relative_output_paths: list[str],
output_dir: str,
default_args: Optional[List[str]] = None,
default_args: list[str] | None = None,
) -> None:
"""Creates a corpus manifest describing the bitcode that has been found.