[analyzer][tests] Measure peak memory consumption for every project

Differential Revision: https://reviews.llvm.org/D82967
2020-07-10 10:54:18 +03:00 · 2020-07-10 10:54:18 +03:00 · 21bacc2154
commit 21bacc2154
parent 68c011aa08
4 changed files with 136 additions and 29 deletions
--- a/clang/utils/analyzer/Dockerfile
+++ b/clang/utils/analyzer/Dockerfile
@ -54,8 +54,7 @@ ENV PATH="/analyzer/bin:${PATH}"

 ADD entrypoint.py /entrypoint.py

-# Uncomment in case of requirements
-# ADD requirements.txt /requirements.txt
-# RUN pip3 install -r /requirements.txt
+ADD requirements.txt /requirements.txt
+RUN pip3 install -r /requirements.txt

 ENTRYPOINT ["python", "/entrypoint.py"]
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@ -43,7 +43,7 @@ For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo

 import glob
@ -63,7 +63,7 @@ from queue import Queue
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple


 ###############################################################################
@ -115,7 +115,7 @@ logging.basicConfig(
 if 'CC' in os.environ:
    cc_candidate: Optional[str] = os.environ['CC']
 else:
-    cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+    cc_candidate = utils.which("clang", os.environ['PATH'])
 if not cc_candidate:
    stderr("Error: cannot find 'clang' in PATH")
    sys.exit(1)
@ -194,9 +194,9 @@ def run_cleanup_script(directory: str, build_log_file: IO):
    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
    script_path = os.path.join(directory, CLEANUP_SCRIPT)

-    SATestUtils.run_script(script_path, build_log_file, cwd,
-                           out=LOCAL.stdout, err=LOCAL.stderr,
-                           verbose=VERBOSE)
+    utils.run_script(script_path, build_log_file, cwd,
+                     out=LOCAL.stdout, err=LOCAL.stderr,
+                     verbose=VERBOSE)


 class TestInfo(NamedTuple):
@ -351,8 +351,6 @@ class ProjectTester:
            return OUTPUT_DIR_NAME

    def build(self, directory: str, output_dir: str):
-        time_start = time.time()
-
        build_log_path = get_build_log_path(output_dir)

        stdout(f"Log file: {build_log_path}\n")
@ -375,19 +373,23 @@ class ProjectTester:
            if self.project.mode == 1:
                self._download_and_patch(directory, build_log_file)
                run_cleanup_script(directory, build_log_file)
-                self.scan_build(directory, output_dir, build_log_file)
+                build_time, memory = self.scan_build(directory, output_dir,
+                                               build_log_file)
            else:
-                self.analyze_preprocessed(directory, output_dir)
+                build_time, memory = self.analyze_preprocessed(directory,
+                                                               output_dir)

            if self.is_reference_build:
                run_cleanup_script(directory, build_log_file)
                normalize_reference_results(directory, output_dir,
                                            self.project.mode)

-        stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
+        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
+               f"peak memory: {utils.memory_to_str(memory)}). "
               f"See the log for more details: {build_log_path}\n")

-    def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
+    def scan_build(self, directory: str, output_dir: str,
+                   build_log_file: IO) -> Tuple[float, int]:
        """
        Build the project with scan-build by reading in the commands and
        prefixing them with the scan-build options.
@ -416,6 +418,10 @@ class ProjectTester:
            options += "--override-compiler "

        extra_env: Dict[str, str] = {}
+
+        execution_time = 0.0
+        peak_memory = 0
+
        try:
            command_file = open(build_script_path, "r")
            command_prefix = "scan-build " + options + " "
@ -451,11 +457,15 @@ class ProjectTester:
                if VERBOSE >= 1:
                    stdout(f"  Executing: {command_to_run}\n")

-                check_call(command_to_run, cwd=cwd,
-                           stderr=build_log_file,
-                           stdout=build_log_file,
-                           env=dict(os.environ, **extra_env),
-                           shell=True)
+                time, mem = utils.check_and_measure_call(
+                    command_to_run, cwd=cwd,
+                    stderr=build_log_file,
+                    stdout=build_log_file,
+                    env=dict(os.environ, **extra_env),
+                    shell=True)
+
+                execution_time += time
+                peak_memory = max(peak_memory, mem)

        except CalledProcessError:
            stderr("Error: scan-build failed. Its output was: \n")
@ -463,7 +473,10 @@ class ProjectTester:
            shutil.copyfileobj(build_log_file, LOCAL.stderr)
            sys.exit(1)

-    def analyze_preprocessed(self, directory: str, output_dir: str):
+        return execution_time, peak_memory
+
+    def analyze_preprocessed(self, directory: str,
+                             output_dir: str) -> Tuple[float, int]:
        """
        Run analysis on a set of preprocessed files.
        """
@ -487,14 +500,17 @@ class ProjectTester:
        fail_path = os.path.join(plist_path, "failures")
        os.makedirs(fail_path)

+        execution_time = 0.0
+        peak_memory = 0
+
        for full_file_name in glob.glob(directory + "/*"):
            file_name = os.path.basename(full_file_name)
            failed = False

            # Only run the analyzes on supported files.
-            if SATestUtils.has_no_extension(file_name):
+            if utils.has_no_extension(file_name):
                continue
-            if not SATestUtils.is_valid_single_input_file(file_name):
+            if not utils.is_valid_single_input_file(file_name):
                stderr(f"Error: Invalid single input file {full_file_name}.\n")
                raise Exception()

@ -509,8 +525,12 @@ class ProjectTester:
                    if VERBOSE >= 1:
                        stdout(f"  Executing: {command}\n")

-                    check_call(command, cwd=directory, stderr=log_file,
-                               stdout=log_file, shell=True)
+                    time, mem = utils.check_and_measure_call(
+                        command, cwd=directory, stderr=log_file,
+                        stdout=log_file, shell=True)
+
+                    execution_time += time
+                    peak_memory = max(peak_memory, mem)

                except CalledProcessError as e:
                    stderr(f"Error: Analyzes of {full_file_name} failed. "
@ -522,6 +542,8 @@ class ProjectTester:
                if not failed:
                    os.remove(log_file.name)

+        return execution_time, peak_memory
+
    def generate_config(self) -> str:
        out = "serialize-stats=true,stable-report-filename=true"

@ -598,9 +620,9 @@ class ProjectTester:
    @staticmethod
    def _run_download_script(directory: str, build_log_file: IO):
        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
-        SATestUtils.run_script(script_path, build_log_file, directory,
-                               out=LOCAL.stdout, err=LOCAL.stderr,
-                               verbose=VERBOSE)
+        utils.run_script(script_path, build_log_file, directory,
+                         out=LOCAL.stdout, err=LOCAL.stderr,
+                         verbose=VERBOSE)

    @staticmethod
    def _apply_patch(directory: str, build_log_file: IO):
--- a/clang/utils/analyzer/SATestUtils.py
+++ b/clang/utils/analyzer/SATestUtils.py
@ -1,8 +1,9 @@
 import os
 import sys
+import time

 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple


 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@ -47,6 +48,87 @@ def is_valid_single_input_file(file_name: str) -> bool:
    return ext in (".i", ".ii", ".c", ".cpp", ".m", "")


+def time_to_str(time: float) -> str:
+    """
+    Convert given time in seconds into a human-readable string.
+    """
+    return f"{time:.2f}s"
+
+
+def memory_to_str(memory: int) -> str:
+    """
+    Convert given number of bytes into a human-readable string.
+    """
+    if memory:
+        try:
+            import humanize
+            return humanize.naturalsize(memory, gnu=True)
+        except ImportError:
+            # no formatter installed, let's keep it in bytes
+            return f"{memory}B"
+
+    # If memory is 0, we didn't succeed measuring it.
+    return "N/A"
+
+
+def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
+    """
+    Run command with arguments.  Wait for command to complete and measure
+    execution time and peak memory consumption.
+    If the exit code was zero then return, otherwise raise
+    CalledProcessError.  The CalledProcessError object will have the
+    return code in the returncode attribute.
+
+    The arguments are the same as for the call and check_call functions.
+
+    Return a tuple of execution time and peak memory.
+    """
+    peak_mem = 0
+    start_time = time.time()
+
+    try:
+        import psutil as ps
+
+        def get_memory(process: ps.Process) -> int:
+            mem = 0
+
+            # we want to gather memory usage from all of the child processes
+            descendants = list(process.children(recursive=True))
+            descendants.append(process)
+
+            for subprocess in descendants:
+                try:
+                    mem += subprocess.memory_info().rss
+                except (ps.NoSuchProcess, ps.AccessDenied):
+                    continue
+
+            return mem
+
+        with ps.Popen(*popenargs, **kwargs) as process:
+            # while the process is running calculate resource utilization.
+            while (process.is_running() and
+                   process.status() != ps.STATUS_ZOMBIE):
+                # track the peak utilization of the process
+                peak_mem = max(peak_mem, get_memory(process))
+                time.sleep(.5)
+
+            if process.is_running():
+                process.kill()
+
+        if process.returncode != 0:
+            cmd = kwargs.get("args")
+            if cmd is None:
+                cmd = popenargs[0]
+            raise CalledProcessError(process.returncode, cmd)
+
+    except ImportError:
+        # back off to subprocess if we don't have psutil installed
+        peak_mem = 0
+        check_call(*popenargs, **kwargs)
+
+    return time.time() - start_time, peak_mem
+
+
 def run_script(script_path: str, build_log_file: IO, cwd: str,
               out=sys.stdout, err=sys.stderr, verbose: int = 0):
    """
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@ -0,0 +1,4 @@
+graphviz
+humanize
+matplotlib
+psutil