[libc++] Measure additional metrics when running SPEC benchmarks (#177669)

This patch adds support for measuring additional metrics like max RSS, retired instructions and more when running the SPEC benchmarks. Fixes #177611
2026-01-26 14:37:53 -05:00 · 2026-01-26 14:37:53 -05:00 · f9069a5f30
commit f9069a5f30
parent ac454a2706
2 changed files with 55 additions and 4 deletions
--- a/libcxx/test/benchmarks/spec.gen.py
+++ b/libcxx/test/benchmarks/spec.gen.py
@ -66,11 +66,19 @@ spec_benchmarks &= no_fortran

 for benchmark in spec_benchmarks:
    print(f'#--- {benchmark}.sh.test')
-    print(f'RUN: rm -rf %{{temp}}') # clean up any previous (potentially incomplete) run
+    # Clean up any previous (potentially incomplete) run
+    print(f'RUN: rm -rf %{{temp}}')
+
+    # Build the benchmark
    print(f'RUN: mkdir %{{temp}}')
    print(f'RUN: cp {spec_config} %{{temp}}/spec-config.cfg')
-    print(f'RUN: %{{spec_dir}}/bin/runcpu --config %{{temp}}/spec-config.cfg --size train --output-root %{{temp}} --rebuild {benchmark}')
-    print(f'RUN: rm -rf %{{temp}}/benchspec') # remove the temporary directory, which can become quite large
+    print(f'RUN: %{{spec_dir}}/bin/runcpu --config %{{temp}}/spec-config.cfg --action build --output_root %{{temp}} {benchmark}')
+
+    # Run the benchmark
+    print(f'RUN: /usr/bin/time -l -o %{{temp}}/time.txt %{{spec_dir}}/bin/runcpu --config %{{temp}}/spec-config.cfg --action run --size train --output_root %{{temp}} {benchmark}')
+
+    # Clean up, since there can be lots of content created
+    print(f'RUN: rm -rf %{{temp}}/benchspec')

    # The `runcpu` command above doesn't fail even if the benchmark fails to run. To determine failure, parse the CSV
    # results and ensure there are no compilation errors or runtime errors in the status row. Also print the logs and
@ -78,6 +86,7 @@ for benchmark in spec_benchmarks:
    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %{{temp}}/result/*.train.csv > %{{temp}}/status || ! cat %{{temp}}/result/*.log')
    print(f'RUN: ! grep -E "CE|RE" %{{temp}}/status || ! cat %{{temp}}/result/*.log')

-    # If there were no errors, parse the results into LNT-compatible format and print them.
+    # If there were no errors, parse the SPEC results and the `time` output into LNT-compatible format and print them.
    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %{{temp}}/result/*.train.csv --output-format=lnt > %{{temp}}/results.lnt')
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-time-output %{{temp}}/time.txt --benchmark {benchmark} --extract instructions max_rss cycles peak_memory >> %{{temp}}/results.lnt')
    print(f'RUN: cat %{{temp}}/results.lnt')
--- a/libcxx/utils/parse-time-output
+++ b/libcxx/utils/parse-time-output
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+import sys
+
+def main(argv):
+    parser = argparse.ArgumentParser(
+        prog='parse-time-output',
+        description='Parse the output of /usr/bin/time and output it in LNT-compatible format.')
+    parser.add_argument('input_file', type=argparse.FileType('r'), default='-',
+        help='Path of the file to extract results from. By default, stdin.')
+    parser.add_argument('--benchmark', type=str, required=True,
+        help='The name of the benchmark to use in the resulting LNT output.')
+    parser.add_argument('--extract', type=str, choices=['instructions', 'max_rss', 'cycles', 'peak_memory'], nargs='+',
+        help='The name of the metrics to extract from the time output.')
+    args = parser.parse_args(argv)
+
+    # Mapping from metric names to field names in the time output.
+    field_mapping = {
+        'instructions': 'instructions retired',
+        'max_rss': 'maximum resident set size',
+        'cycles': 'cycles elapsed',
+        'peak_memory': 'peak memory footprint',
+    }
+    to_extract = [field_mapping[e] for e in args.extract]
+
+    metrics = {}
+    for line in args.input_file:
+        match = re.match(r'\s*(\d+)\s+(\w+.*)', line)
+        if match is not None:
+            time_desc = match.group(2)
+            for metric, desc in field_mapping.items():
+                if time_desc == desc:
+                    metrics[metric] = int(match.group(1))
+                    break
+
+    for metric, value in metrics.items():
+        print(f'{args.benchmark}.{metric} {value}')
+
+if __name__ == '__main__':
+    main(sys.argv[1:])