From d434d82010be4febba9bc2c208fef2196e5922bb Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 18 Mar 2026 17:42:36 -0700 Subject: [PATCH] [MLGO] Modernize type annotations in mlgo-utils (#187408) Use the 3.10+ syntax while supporting older versions usin an import from __future__. --- .../mlgo-utils/mlgo/corpus/extract_ir_lib.py | 50 ++++++++++--------- .../mlgo-utils/mlgo/corpus/make_corpus_lib.py | 14 +++--- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py index 386dede34ad1..7199707f3c1e 100644 --- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py +++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py @@ -3,6 +3,10 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """Library functions for IR extraction.""" +# TODO(boomanaiden154): Remove this import once we have upgrade to python 3.10 +# which supports the relevant type annotations by default. +from __future__ import annotations + import os import pathlib import re @@ -13,14 +17,12 @@ import functools import json import logging -from typing import Dict, List, Optional - _UNSPECIFIED_OVERRIDE = [""] # TODO(ml-compiler-opt): maybe we can also convert here the cmdline file,from a # \0 - separated list of strings, to a \n one. -def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool: +def should_include_module(cmdline: str, match_regexp: str | None) -> bool: """Determine if the module should be included.""" if match_regexp is None: return True @@ -28,7 +30,7 @@ def should_include_module(cmdline: str, match_regexp: Optional[str]) -> bool: return any(len(re.findall(match_regexp, l)) for l in lines) -def get_thinlto_index(cmdline: str, basedir: str) -> Optional[str]: +def get_thinlto_index(cmdline: str, basedir: str) -> str | None: opts = cmdline.split("\0") for option in opts: if option.startswith("-fthinlto-index"): @@ -122,11 +124,11 @@ class TrainingIRExtractor: def _extract_clang_artifacts( self, llvm_objcopy_path: str, - cmd_filter: Optional[str], + cmd_filter: str | None, is_thinlto: bool, cmd_section_name: str, bitcode_section_name: str, - ) -> Optional[str]: + ) -> str | None: """Run llvm-objcopy to extract the .bc and command line.""" if not os.path.exists(self.input_obj()): logging.info("%s does not exist.", self.input_obj()) @@ -173,7 +175,7 @@ class TrainingIRExtractor: ) return self.relative_output_path() - def _extract_lld_artifacts(self) -> Optional[str]: + def _extract_lld_artifacts(self) -> str | None: """Extract the .bc file with ThinLTO index from an lld ThinLTO invocation.""" if not os.path.exists(self.lld_src_bc()): logging.info("%s does not exist.", self.lld_src_bc()) @@ -193,12 +195,12 @@ class TrainingIRExtractor: def extract( self, - llvm_objcopy_path: Optional[str] = None, - cmd_filter: Optional[str] = None, - thinlto_build: Optional[str] = None, - cmd_section_name: Optional[str] = ".llvmcmd", - bitcode_section_name: Optional[str] = ".llvmbc", - ) -> Optional[str]: + llvm_objcopy_path: str | None = None, + cmd_filter: str | None = None, + thinlto_build: str | None = None, + cmd_section_name: str | None = ".llvmcmd", + bitcode_section_name: str | None = ".llvmbc", + ) -> str | None: if thinlto_build == "local": return self._extract_lld_artifacts() return self._extract_clang_artifacts( @@ -211,8 +213,8 @@ class TrainingIRExtractor: def convert_compile_command_to_objectfile( - command: Dict[str, str], output_dir: str -) -> Optional[TrainingIRExtractor]: + command: dict[str, str], output_dir: str +) -> TrainingIRExtractor | None: obj_base_dir = command["directory"] if "arguments" in command: cmd_parts = command["arguments"] @@ -238,8 +240,8 @@ def convert_compile_command_to_objectfile( def load_from_compile_commands( - json_array: List[Dict[str, str]], output_dir: str -) -> List[TrainingIRExtractor]: + json_array: list[dict[str, str]], output_dir: str +) -> list[TrainingIRExtractor]: objs = [ convert_compile_command_to_objectfile(cmd, output_dir) for cmd in json_array ] @@ -248,8 +250,8 @@ def load_from_compile_commands( def load_from_lld_params( - params_array: List[str], obj_base_dir: str, output_dir: str -) -> List[TrainingIRExtractor]: + params_array: list[str], obj_base_dir: str, output_dir: str +) -> list[TrainingIRExtractor]: """Create an ObjectFile array based on lld's parameters.""" # yank out -o and the output. After that, anything not starting with '-', and # ending in a '.o', is an object file. @@ -275,7 +277,7 @@ def load_from_lld_params( def load_from_directory( obj_base_dir: str, output_dir: str -) -> List[TrainingIRExtractor]: +) -> list[TrainingIRExtractor]: """Create an object file array by globbing an entire drectory. Args: @@ -298,7 +300,7 @@ def load_from_directory( def load_for_lld_thinlto( obj_base_dir: str, output_dir: str -) -> List[TrainingIRExtractor]: +) -> list[TrainingIRExtractor]: # .3.import.bc is the suffix attached to post-merge-pre-opt ('postimport') # IR bitcode saved by lld. It is hardcoded into lld. ThinLTO index files # are also emitted next to the postimport bitcode, with the suffix @@ -340,10 +342,10 @@ def load_bazel_aquery(aquery_json, obj_base_dir: str, output_dir: str): def run_extraction( - objs: List[TrainingIRExtractor], + objs: list[TrainingIRExtractor], num_workers: int, llvm_objcopy_path: str, - cmd_filter: Optional[str], + cmd_filter: str | None, thinlto_build: str, cmd_section_name: str, bitcode_section_name: str, @@ -383,7 +385,7 @@ def run_extraction( def write_corpus_manifest( - thinlto_build: str, relative_output_paths: List[str], output_dir: str + thinlto_build: str, relative_output_paths: list[str], output_dir: str ): """Writes a corpus_manifest.json containing all necessary information about the corpus. diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus_lib.py b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus_lib.py index 697c97ebf6ee..478596f25282 100644 --- a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus_lib.py +++ b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus_lib.py @@ -3,17 +3,19 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """Library functions for making a corpus from arbitrary bitcode.""" +# TODO(boomanaiden154): Remove this import once we have upgrade to python 3.10 +# which supports the relevant type annotations by default. +from __future__ import annotations + import pathlib import os import shutil import json -from typing import List, Optional - BITCODE_EXTENSION = ".bc" -def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]: +def load_bitcode_from_directory(bitcode_base_dir: str) -> list[str]: """Finds bitcode files to extract from a given directory. Args: @@ -33,7 +35,7 @@ def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]: def copy_bitcode( - relative_paths: List[str], bitcode_base_dir: str, output_dir: str + relative_paths: list[str], bitcode_base_dir: str, output_dir: str ) -> None: """Copies bitcode files from the base directory to the output directory. @@ -51,9 +53,9 @@ def copy_bitcode( def write_corpus_manifest( - relative_output_paths: List[str], + relative_output_paths: list[str], output_dir: str, - default_args: Optional[List[str]] = None, + default_args: list[str] | None = None, ) -> None: """Creates a corpus manifest describing the bitcode that has been found.