[libc++] Add a script to produce benchmarks for LNT (#175594)

This patch adds a script to run a subset of libc++'s benchmarks for uploading to LNT. As part of this patch the test-at-commit script is modified to no longer build the library itself. Indeed, this provides the necessary flexibility to run the test suite multiple times on the same built library, and also addresses previous concerns where test-at-commit couldn't customize how the library is being built.
2026-01-20 09:57:37 -05:00 · 2026-01-20 09:57:37 -05:00 · 7ce6a94c61
commit 7ce6a94c61
parent b3212adae0
4 changed files with 228 additions and 45 deletions
--- a/libcxx/utils/benchmark-historical
+++ b/libcxx/utils/benchmark-historical
@ -76,30 +76,37 @@ def main(argv):
            logging.info(f'Skipping {commit} which already has data in {output_file}')
            continue
        else:
-            logging.info(f'Benchmarking {commit}')
+            logging.info(f'Benchmarking {commit} against test-suite in {args.git_repo}')

-        with tempfile.TemporaryDirectory() as build_dir:
-            test_cmd = [PARENT_DIR / 'test-at-commit', '--git-repo', args.git_repo,
-                                                       '--build', build_dir,
-                                                       '--commit', commit]
-            test_cmd += ['--'] + lit_options
+        with tempfile.TemporaryDirectory() as libcxx_install_dir:
+            with tempfile.TemporaryDirectory() as build_dir:
+                build_cmd = [PARENT_DIR / 'build-at-commit', '--git-repo', args.git_repo,
+                                                             '--commit', commit,
+                                                             '--install-dir', libcxx_install_dir,
+                                                             '--', '-DCMAKE_BUILD_TYPE=RelWithDebInfo']

-            if args.dry_run:
-                pretty = ' '.join(str(a) for a in test_cmd)
-                logging.info(f'Running {pretty}')
-                continue
+                test_cmd = [PARENT_DIR / 'test-at-commit', '--git-repo', args.git_repo,
+                                                           '--libcxx-installation', libcxx_install_dir,
+                                                           '--build-dir', build_dir]
+                test_cmd += ['--'] + lit_options

-            subprocess.call(test_cmd)
-            output_file.parent.mkdir(parents=True, exist_ok=True)
-            mode = 'a' if args.existing == 'append' else 'w'
-            if output_file.exists() and args.existing == 'append':
-                logging.info(f'Appending to existing data for {commit}')
-            elif output_file.exists() and args.existing == 'overwrite':
-                logging.info(f'Overwriting existing data for {commit}')
-            else:
-                logging.info(f'Writing data for {commit}')
-            with open(output_file, mode) as out:
-                subprocess.check_call([(PARENT_DIR / 'consolidate-benchmarks'), build_dir], stdout=out)
+                if args.dry_run:
+                    logging.info(f'Running {" ".join(str(a) for a in build_cmd)}')
+                    logging.info(f'Running {" ".join(str(a) for a in test_cmd)}')
+                    continue
+
+                subprocess.check_call(build_cmd)
+                subprocess.call(test_cmd)
+                output_file.parent.mkdir(parents=True, exist_ok=True)
+                mode = 'a' if args.existing == 'append' else 'w'
+                if output_file.exists() and args.existing == 'append':
+                    logging.info(f'Appending to existing data for {commit}')
+                elif output_file.exists() and args.existing == 'overwrite':
+                    logging.info(f'Overwriting existing data for {commit}')
+                else:
+                    logging.info(f'Writing data for {commit}')
+                with open(output_file, mode) as out:
+                    subprocess.check_call([(PARENT_DIR / 'consolidate-benchmarks'), build_dir], stdout=out)

 if __name__ == '__main__':
    main(sys.argv[1:])
--- a/libcxx/utils/build-at-commit
+++ b/libcxx/utils/build-at-commit
@ -98,7 +98,7 @@ def main(argv):

    # Gather CMake options
    cmake_options = []
-    if args.cmake_options is not None:
+    if args.cmake_options:
        if args.cmake_options[0] != '--':
            raise ArgumentError('For clarity, CMake options must be separated from other options by --')
        cmake_options = args.cmake_options[1:]
--- a/libcxx/utils/ci/benchmark-for-lnt.py
+++ b/libcxx/utils/ci/benchmark-for-lnt.py
@ -0,0 +1,128 @@
+#!/usr/bin/env python
+# ===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===##
+
+import argparse
+import os
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+def step(message: str) -> None:
+    print(message, file=sys.stderr)
+
+def directory_path(string):
+    if os.path.isdir(string):
+        return pathlib.Path(string)
+    else:
+        raise NotADirectoryError(string)
+
+def main(argv):
+    parser = argparse.ArgumentParser(
+        prog='benchmark-for-lnt',
+        description='Benchmark libc++ at the given commit for submitting to LNT.')
+    parser.add_argument('-o', '--output', type=argparse.FileType('w'), default='-',
+        help='Path to the file where the resulting LNT report containing benchmark results is written. '
+             'By default, stdout.')
+    parser.add_argument('--benchmark-commit', type=str, required=True,
+        help='The SHA representing the version of the library to benchmark.')
+    parser.add_argument('--test-suite-commit', type=str, required=True,
+        help='The SHA representing the version of the test suite to use for benchmarking.')
+    parser.add_argument('--machine', type=str, required=True,
+        help='The name of the machine for reporting LNT results.')
+    parser.add_argument('--spec-dir', type=pathlib.Path, required=False,
+        help='Optional path to a SPEC installation to use for benchmarking.')
+    parser.add_argument('--git-repo', type=directory_path, default=os.getcwd(),
+        help='Optional path to the Git repository to use. By default, the current working directory is used.')
+    parser.add_argument('--dry-run', action='store_true',
+        help='Only print what would be executed.')
+    parser.add_argument('-v', '--verbose', action='store_true',
+        help='Print the output of all subcommands.')
+    args = parser.parse_args(argv)
+
+    def run(command, *posargs, **kwargs):
+        command = [str(c) for c in command]
+        if args.dry_run:
+            print(f'$ {" ".join(command)}')
+        else:
+            # If we're running with verbose, print everything but redirect output to stderr since
+            # we already output the json to stdout in some cases. Otherwise, hush everything.
+            if args.verbose:
+                if 'stdout' not in kwargs:
+                    kwargs.update({'stdout': sys.stderr})
+            else:
+                if 'stdout' not in kwargs:
+                    kwargs.update({'stdout': subprocess.DEVNULL})
+                if 'stderr' not in kwargs:
+                    kwargs.update({'stderr': subprocess.DEVNULL})
+            subprocess.check_call(command, *posargs, **kwargs)
+
+    with tempfile.TemporaryDirectory() as build_dir:
+        build_dir = pathlib.Path(build_dir)
+
+        step(f'Building libc++ at commit {args.benchmark_commit}')
+        run([args.git_repo / 'libcxx/utils/build-at-commit',
+                        '--git-repo', args.git_repo,
+                        '--install-dir', build_dir / 'install',
+                        '--commit', args.benchmark_commit,
+                        '--', '-DCMAKE_BUILD_TYPE=RelWithDebInfo'])
+
+        if args.spec_dir is not None:
+            step(f'Running SPEC benchmarks from {args.test_suite_commit} against libc++ {args.benchmark_commit}')
+            run([args.git_repo / 'libcxx/utils/test-at-commit',
+                        '--git-repo', args.git_repo,
+                        '--build-dir', build_dir / 'spec',
+                        '--test-suite-commit', args.test_suite_commit,
+                        '--libcxx-installation', build_dir / 'install',
+                        '--',
+                        '-j1', '--time-tests',
+                        '--param', 'optimization=speed',
+                        '--param', 'std=c++17',
+                        '--param', f'spec_dir={args.spec_dir}',
+                        build_dir / 'spec/libcxx/test',
+                        '--filter', 'benchmarks/spec.gen.py'])
+
+        # TODO: For now, we run only a subset of the benchmarks because running the whole test suite is too slow.
+        #       Run the whole test suite once https://github.com/llvm/llvm-project/issues/173032 is resolved.
+        step(f'Running microbenchmarks from {args.test_suite_commit} against libc++ {args.benchmark_commit}')
+        run([args.git_repo / 'libcxx/utils/test-at-commit',
+                        '--git-repo', args.git_repo,
+                        '--build-dir', build_dir / 'micro',
+                        '--test-suite-commit', args.test_suite_commit,
+                        '--libcxx-installation', build_dir / 'install',
+                        '--',
+                        '-j1', '--time-tests',
+                        '--param', 'optimization=speed',
+                        '--param', 'std=c++26',
+                        build_dir / 'micro/libcxx/test',
+                        '--filter', 'benchmarks/(algorithms|containers|iterators|locale|memory|streams|numeric|utility)'])
+
+        step('Installing LNT')
+        run(['python', '-m', 'venv', build_dir / '.venv'])
+        run([build_dir / '.venv/bin/pip', 'install', 'llvm-lnt'])
+
+        step('Consolidating benchmark results and creating JSON report')
+        if args.spec_dir is not None:
+            with open(build_dir / 'benchmarks.lnt', 'w') as f:
+                run([args.git_repo / 'libcxx/utils/consolidate-benchmarks', build_dir / 'spec'], stdout=f)
+        with open(build_dir / 'benchmarks.lnt', 'a') as f:
+            run([args.git_repo / 'libcxx/utils/consolidate-benchmarks', build_dir / 'micro'], stdout=f)
+        order = len(subprocess.check_output(['git', '-C', args.git_repo, 'rev-list', args.benchmark_commit]).splitlines())
+        commit_info = subprocess.check_output(['git', '-C', args.git_repo, 'show', args.benchmark_commit, '--no-patch']).decode()
+        run([build_dir / '.venv/bin/lnt', 'importreport', '--order', str(order), '--machine', args.machine,
+                '--run-info', f'commit_info={commit_info}',
+                build_dir / 'benchmarks.lnt', build_dir / 'benchmarks.json'])
+
+        if not args.dry_run:
+            with open(build_dir / 'benchmarks.json', 'r') as f:
+                args.output.write(f.read())
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- a/libcxx/utils/test-at-commit
+++ b/libcxx/utils/test-at-commit
@ -35,6 +35,41 @@ libcxx.test.config.configure(
 )
 """

+# Unofficial list of directories required to build libc++. This is a best guess that should work
+# when checking out the monorepo at most commits, but it's technically not guaranteed to work
+# (especially for much older commits).
+LIBCXX_REQUIRED_DIRECTORIES = [
+    'libcxx',
+    'libcxxabi',
+    'llvm/cmake',
+    'llvm/utils/llvm-lit',
+    'llvm/utils/lit',
+    'runtimes',
+    'cmake',
+    'third-party/benchmark',
+    'libc'
+]
+
+def checkout_subdirectories(git_repo, commit, paths, destination):
+    """
+    Produce a copy of the specified Git-tracked files/directories at the given commit.
+    The resulting files and directories at placed at the given location.
+    """
+    with tempfile.TemporaryDirectory() as tmp:
+        tmpfile = os.path.join(tmp, 'archive.tar.gz')
+        git_archive = ['git', '-C', git_repo, 'archive', '--format', 'tar.gz', '--output', tmpfile, commit, '--'] + list(paths)
+        subprocess.check_call(git_archive)
+        os.makedirs(destination, exist_ok=True)
+        subprocess.check_call(['tar', '-x', '-z', '-f', tmpfile, '-C', destination])
+
+def exists_in_commit(git_repo, commit, path):
+    """
+    Return whether the given path (file or directory) existed at the given commit.
+    """
+    cmd = ['git', '-C', git_repo, 'show', f'{commit}:{path}']
+    result = subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return result == 0
+
 def directory_path(string):
    if os.path.isdir(string):
        return pathlib.Path(string)
@ -44,16 +79,18 @@ def directory_path(string):
 def main(argv):
    parser = argparse.ArgumentParser(
        prog='test-at-commit',
-        description='Build libc++ at the specified commit and test it against the version of the test suite '
-                    'currently checked out in the specified Git repository. '
-                    'This makes it easier to perform historical analyses of libc++ behavior, gather historical '
-                    'performance data, bisect issues, and so on. '
-                    'A current limitation of this script is that it assumes the arguments passed to CMake when '
-                    'building the library.')
-    parser.add_argument('--build', '-B', type=pathlib.Path, required=True,
-        help='Path to create the build directory for running the test suite at.')
-    parser.add_argument('--commit', type=str, required=True,
-        help='Commit to build libc++ at.')
+        description='Test the provided libc++ installation against the test suite at the specified commit (or '
+                    'the currently checked-out sources by default). This makes it easier to perform historical '
+                    'analyses of libc++ behavior, gather historical performance data, bisect issues, and so on.')
+    parser.add_argument('--build-dir', '-B', type=pathlib.Path, required=True,
+        help='Path to create the build directory for running the test suite at. The results of the tests '
+             'are located in that directory after the run.')
+    parser.add_argument('--libcxx-installation', type=pathlib.Path, required=True,
+        help='Path to the directory where a copy of libc++ to run tests on is installed.')
+    parser.add_argument('--test-suite-commit', type=str, required=False,
+        help='Commit to use for the test suite. If left unspecified, the currently checked-out version of the '
+             'test suite is used. Otherwise, the requested version is checked out in a separate directory and '
+             'that version of the test suite is used.')
    parser.add_argument('lit_options', nargs=argparse.REMAINDER,
        help='Optional arguments passed to lit when running the tests. Should be provided last and '
             'separated from other arguments with a `--`.')
@ -61,6 +98,9 @@ def main(argv):
        help='Optional path to the Git repository to use. By default, the current working directory is used.')
    args = parser.parse_args(argv)

+    args.build_dir = args.build_dir.resolve()
+    args.libcxx_installation = args.libcxx_installation.resolve()
+
    # Gather lit options
    lit_options = []
    if args.lit_options is not None:
@ -68,29 +108,37 @@ def main(argv):
            raise ArgumentError('For clarity, Lit options must be separated from other options by --')
        lit_options = args.lit_options[1:]

-    with tempfile.TemporaryDirectory() as install_dir:
-        # Build the library at the baseline
-        build_cmd = [PARENT_DIR / 'build-at-commit', '--git-repo', args.git_repo,
-                                                     '--install-dir', install_dir,
-                                                     '--commit', args.commit]
-        build_cmd += ['--', '-DCMAKE_BUILD_TYPE=RelWithDebInfo']
-        subprocess.check_call(build_cmd)
+    # This is the list of directories that must be cleaned up before we return
+    tempdirs = []
+    try:
+        # If needed, check out the test suite at the commit we're going to use for the suite
+        if args.test_suite_commit is None:
+            test_suite_sources = args.git_repo
+        else:
+            tempdirs.append(tempfile.TemporaryDirectory())
+            test_suite_sources = pathlib.Path(tempdirs[-1].name)
+            checkout_dirs = [d for d in LIBCXX_REQUIRED_DIRECTORIES if exists_in_commit(args.git_repo, args.test_suite_commit, d)]
+            checkout_subdirectories(args.git_repo, args.test_suite_commit, checkout_dirs, test_suite_sources)

        # Configure the test suite in the specified build directory
-        args.build.mkdir(parents=True, exist_ok=True)
-        lit_cfg = (args.build / 'temp_lit_cfg.cfg.in').absolute()
+        args.build_dir.mkdir(parents=True, exist_ok=True)
+        lit_cfg = (args.build_dir / 'temp_lit_cfg.cfg.in').absolute()
        with open(lit_cfg, 'w') as f:
-            f.write(LIT_CONFIG_FILE.format(INSTALL_ROOT=install_dir))
+            f.write(LIT_CONFIG_FILE.format(INSTALL_ROOT=args.libcxx_installation))

-        test_suite_cmd = ['cmake', '-B', args.build, '-S', args.git_repo / 'runtimes', '-G', 'Ninja']
+        test_suite_cmd = ['cmake', '-B', args.build_dir, '-S', test_suite_sources / 'runtimes', '-G', 'Ninja']
        test_suite_cmd += ['-D', 'LLVM_ENABLE_RUNTIMES=libcxx;libcxxabi']
        test_suite_cmd += ['-D', 'LIBCXXABI_USE_LLVM_UNWINDER=OFF']
        test_suite_cmd += ['-D', f'LIBCXX_TEST_CONFIG={lit_cfg}']
        subprocess.check_call(test_suite_cmd)

-        # Run the specified tests against the produced baseline installation
-        lit_cmd = [PARENT_DIR / 'libcxx-lit', args.build] + lit_options
+        # Run the specified tests against the built library
+        lit_cmd = [PARENT_DIR / 'libcxx-lit', args.build_dir] + lit_options
        subprocess.check_call(lit_cmd)
+    finally:
+        for d in tempdirs:
+            d.cleanup()
+

 if __name__ == '__main__':
    main(sys.argv[1:])