[analyzer][tests] Measure peak memory consumption for every project

Differential Revision: https://reviews.llvm.org/D82967
2020-07-10 10:54:18 +03:00 · 2020-07-10 10:54:18 +03:00 · 21bacc2154
commit 21bacc2154
parent 68c011aa08
4 changed files with 136 additions and 29 deletions
--- a/clang/utils/analyzer/Dockerfile
+++ b/clang/utils/analyzer/Dockerfile
@ -54,8 +54,7 @@ ENV PATH="/analyzer/bin:${PATH}"
 ADD entrypoint.py /entrypoint.py
-# Uncomment in case of requirements
+ADD requirements.txt /requirements.txt
-# ADD requirements.txt /requirements.txt
+RUN pip3 install -r /requirements.txt
 # RUN pip3 install -r /requirements.txt
 ENTRYPOINT ["python", "/entrypoint.py"]
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@ -43,7 +43,7 @@ For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo
 import glob
@ -63,7 +63,7 @@ from queue import Queue
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
 ###############################################################################
@ -115,7 +115,7 @@ logging.basicConfig(
 if 'CC' in os.environ:
    cc_candidate: Optional[str] = os.environ['CC']
 else:
-    cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+    cc_candidate = utils.which("clang", os.environ['PATH'])
 if not cc_candidate:
    stderr("Error: cannot find 'clang' in PATH")
    sys.exit(1)
@ -194,9 +194,9 @@ def run_cleanup_script(directory: str, build_log_file: IO):
    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
    script_path = os.path.join(directory, CLEANUP_SCRIPT)
-    SATestUtils.run_script(script_path, build_log_file, cwd,
+    utils.run_script(script_path, build_log_file, cwd,
-                           out=LOCAL.stdout, err=LOCAL.stderr,
+                     out=LOCAL.stdout, err=LOCAL.stderr,
-                           verbose=VERBOSE)
+                     verbose=VERBOSE)
 class TestInfo(NamedTuple):
@ -351,8 +351,6 @@ class ProjectTester:
            return OUTPUT_DIR_NAME
    def build(self, directory: str, output_dir: str):
        time_start = time.time()
        build_log_path = get_build_log_path(output_dir)
        stdout(f"Log file: {build_log_path}\n")
@ -375,19 +373,23 @@ class ProjectTester:
            if self.project.mode == 1:
                self._download_and_patch(directory, build_log_file)
                run_cleanup_script(directory, build_log_file)
-                self.scan_build(directory, output_dir, build_log_file)
+                build_time, memory = self.scan_build(directory, output_dir,
                                               build_log_file)
            else:
-                self.analyze_preprocessed(directory, output_dir)
+                build_time, memory = self.analyze_preprocessed(directory,
                                                               output_dir)
            if self.is_reference_build:
                run_cleanup_script(directory, build_log_file)
                normalize_reference_results(directory, output_dir,
                                            self.project.mode)
-        stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
+        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
               f"peak memory: {utils.memory_to_str(memory)}). "
               f"See the log for more details: {build_log_path}\n")
-    def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
+    def scan_build(self, directory: str, output_dir: str,
                   build_log_file: IO) -> Tuple[float, int]:
        """
        Build the project with scan-build by reading in the commands and
        prefixing them with the scan-build options.
@ -416,6 +418,10 @@ class ProjectTester:
            options += "--override-compiler "
        extra_env: Dict[str, str] = {}
        execution_time = 0.0
        peak_memory = 0
        try:
            command_file = open(build_script_path, "r")
            command_prefix = "scan-build " + options + " "
@ -451,11 +457,15 @@ class ProjectTester:
                if VERBOSE >= 1:
                    stdout(f"  Executing: {command_to_run}\n")
-                check_call(command_to_run, cwd=cwd,
+                time, mem = utils.check_and_measure_call(
-                           stderr=build_log_file,
+                    command_to_run, cwd=cwd,
-                           stdout=build_log_file,
+                    stderr=build_log_file,
-                           env=dict(os.environ, **extra_env),
+                    stdout=build_log_file,
-                           shell=True)
+                    env=dict(os.environ, **extra_env),
                    shell=True)
                execution_time += time
                peak_memory = max(peak_memory, mem)
        except CalledProcessError:
            stderr("Error: scan-build failed. Its output was: \n")
@ -463,7 +473,10 @@ class ProjectTester:
            shutil.copyfileobj(build_log_file, LOCAL.stderr)
            sys.exit(1)
-    def analyze_preprocessed(self, directory: str, output_dir: str):
+        return execution_time, peak_memory
    def analyze_preprocessed(self, directory: str,
                             output_dir: str) -> Tuple[float, int]:
        """
        Run analysis on a set of preprocessed files.
        """
@ -487,14 +500,17 @@ class ProjectTester:
        fail_path = os.path.join(plist_path, "failures")
        os.makedirs(fail_path)
        execution_time = 0.0
        peak_memory = 0
        for full_file_name in glob.glob(directory + "/*"):
            file_name = os.path.basename(full_file_name)
            failed = False
            # Only run the analyzes on supported files.
-            if SATestUtils.has_no_extension(file_name):
+            if utils.has_no_extension(file_name):
                continue
-            if not SATestUtils.is_valid_single_input_file(file_name):
+            if not utils.is_valid_single_input_file(file_name):
                stderr(f"Error: Invalid single input file {full_file_name}.\n")
                raise Exception()
@ -509,8 +525,12 @@ class ProjectTester:
                    if VERBOSE >= 1:
                        stdout(f"  Executing: {command}\n")
-                    check_call(command, cwd=directory, stderr=log_file,
+                    time, mem = utils.check_and_measure_call(
-                               stdout=log_file, shell=True)
+                        command, cwd=directory, stderr=log_file,
                        stdout=log_file, shell=True)
                    execution_time += time
                    peak_memory = max(peak_memory, mem)
                except CalledProcessError as e:
                    stderr(f"Error: Analyzes of {full_file_name} failed. "
@ -522,6 +542,8 @@ class ProjectTester:
                if not failed:
                    os.remove(log_file.name)
        return execution_time, peak_memory
    def generate_config(self) -> str:
        out = "serialize-stats=true,stable-report-filename=true"
@ -598,9 +620,9 @@ class ProjectTester:
    @staticmethod
    def _run_download_script(directory: str, build_log_file: IO):
        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
-        SATestUtils.run_script(script_path, build_log_file, directory,
+        utils.run_script(script_path, build_log_file, directory,
-                               out=LOCAL.stdout, err=LOCAL.stderr,
+                         out=LOCAL.stdout, err=LOCAL.stderr,
-                               verbose=VERBOSE)
+                         verbose=VERBOSE)
    @staticmethod
    def _apply_patch(directory: str, build_log_file: IO):
--- a/clang/utils/analyzer/SATestUtils.py
+++ b/clang/utils/analyzer/SATestUtils.py
@ -1,8 +1,9 @@
 import os
 import sys
 import time
 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple
 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@ -47,6 +48,87 @@ def is_valid_single_input_file(file_name: str) -> bool:
    return ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 def time_to_str(time: float) -> str:
    """
    Convert given time in seconds into a human-readable string.
    """
    return f"{time:.2f}s"
 def memory_to_str(memory: int) -> str:
    """
    Convert given number of bytes into a human-readable string.
    """
    if memory:
        try:
            import humanize
            return humanize.naturalsize(memory, gnu=True)
        except ImportError:
            # no formatter installed, let's keep it in bytes
            return f"{memory}B"
    # If memory is 0, we didn't succeed measuring it.
    return "N/A"
 def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
    """
    Run command with arguments.  Wait for command to complete and measure
    execution time and peak memory consumption.
    If the exit code was zero then return, otherwise raise
    CalledProcessError.  The CalledProcessError object will have the
    return code in the returncode attribute.
    The arguments are the same as for the call and check_call functions.
    Return a tuple of execution time and peak memory.
    """
    peak_mem = 0
    start_time = time.time()
    try:
        import psutil as ps
        def get_memory(process: ps.Process) -> int:
            mem = 0
            # we want to gather memory usage from all of the child processes
            descendants = list(process.children(recursive=True))
            descendants.append(process)
            for subprocess in descendants:
                try:
                    mem += subprocess.memory_info().rss
                except (ps.NoSuchProcess, ps.AccessDenied):
                    continue
            return mem
        with ps.Popen(*popenargs, **kwargs) as process:
            # while the process is running calculate resource utilization.
            while (process.is_running() and
                   process.status() != ps.STATUS_ZOMBIE):
                # track the peak utilization of the process
                peak_mem = max(peak_mem, get_memory(process))
                time.sleep(.5)
            if process.is_running():
                process.kill()
        if process.returncode != 0:
            cmd = kwargs.get("args")
            if cmd is None:
                cmd = popenargs[0]
            raise CalledProcessError(process.returncode, cmd)
    except ImportError:
        # back off to subprocess if we don't have psutil installed
        peak_mem = 0
        check_call(*popenargs, **kwargs)
    return time.time() - start_time, peak_mem
 def run_script(script_path: str, build_log_file: IO, cwd: str,
               out=sys.stdout, err=sys.stderr, verbose: int = 0):
    """
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@ -0,0 +1,4 @@
 graphviz
 humanize
 matplotlib
 psutil