[libc++] Improve handling of runtime errors inside SPEC benchmarks

Previously, we would report a successful run if the benchmark exited with an error, and we would produce a timing for the benchmark. After this patch, we consider an error in the benchmark to be a failed LIT test and we don't produce any benchmark data for it.
2025-09-25 12:54:31 -04:00 · 2025-09-25 12:54:31 -04:00 · 62450ba905
commit 62450ba905
parent 777eea0732
2 changed files with 17 additions and 7 deletions
--- a/libcxx/test/benchmarks/spec.gen.py
+++ b/libcxx/test/benchmarks/spec.gen.py
@ -72,7 +72,12 @@ for benchmark in spec_benchmarks:
    print(f'RUN: %{{spec_dir}}/bin/runcpu --config %T/spec-config.cfg --size train --output-root %T --rebuild {benchmark}')
    print(f'RUN: rm -rf %T/benchspec') # remove the temporary directory, which can become quite large

-    # Parse the results into a LNT-compatible format. This also errors out if there are no CSV files, which
-    # means that the benchmark didn't run properly (the `runcpu` command above never reports a failure).
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt || ! cat %T/result/*.log')
+    # The `runcpu` command above doesn't fail even if the benchmark fails to run. To determine failure, parse the CSV
+    # results and ensure there are no compilation errors or runtime errors in the status row. Also print the logs and
+    # fail if there are no CSV files at all, which implies a SPEC error.
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %T/result/*.train.csv > %T/status || ! cat %T/result/*.log')
+    print(f'RUN: ! grep -E "CE|RE" %T/status || ! cat %T/result/*.log')
+
+    # If there were no errors, parse the results into LNT-compatible format and print them.
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt')
    print(f'RUN: cat %T/results.lnt')
--- a/libcxx/utils/parse-spec-results
+++ b/libcxx/utils/parse-spec-results
@ -58,7 +58,10 @@ def main(argv):
             'sure to use appropriate quoting for header names that contain spaces. This option only makes sense '
             'when the output format is CSV.')
    parser.add_argument('--keep-not-run', action='store_true',
-        help='Keep entries whose \'Base Status\' is marked as \'NR\', aka \'Not Run\'. By default, such entries are discarded.')
+        help='Keep entries whose "Base Status" is marked as "NR" (aka "Not Run"). By default, such entries are discarded.')
+    parser.add_argument('--keep-failed', action='store_true',
+        help='Keep entries whose "Base Status" is marked as "CE" (aka "Compilation Error") or "RE" (aka "Runtime Error"). '
+             'By default, such entries are discarded.')
    args = parser.parse_args(argv)

    if args.table == 'full':
@ -76,10 +79,12 @@ def main(argv):
        headers = parsed_headers
        rows.extend(parsed_rows)

-    # Remove rows that were not run unless we were asked to keep them
+    # Remove rows that were not run (or failed) unless we were asked to keep them
+    status = headers.index('Base Status')
    if not args.keep_not_run:
-        not_run = headers.index('Base Status')
-        rows = [row for row in rows if row[not_run] != 'NR']
+        rows = [row for row in rows if row[status] != 'NR']
+    if not args.keep_failed:
+        rows = [row for row in rows if row[status] not in ('CE', 'RE')]

    if args.extract is not None:
        if args.output_format != 'csv':