[libc++] Improve handling of runtime errors inside SPEC benchmarks

Previously, we would report a successful run if the benchmark exited
with an error, and we would produce a timing for the benchmark. After
this patch, we consider an error in the benchmark to be a failed LIT
test and we don't produce any benchmark data for it.
This commit is contained in:
Louis Dionne 2025-09-25 12:54:31 -04:00
parent 777eea0732
commit 62450ba905
2 changed files with 17 additions and 7 deletions

View File

@ -72,7 +72,12 @@ for benchmark in spec_benchmarks:
print(f'RUN: %{{spec_dir}}/bin/runcpu --config %T/spec-config.cfg --size train --output-root %T --rebuild {benchmark}')
print(f'RUN: rm -rf %T/benchspec') # remove the temporary directory, which can become quite large
# Parse the results into a LNT-compatible format. This also errors out if there are no CSV files, which
# means that the benchmark didn't run properly (the `runcpu` command above never reports a failure).
print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt || ! cat %T/result/*.log')
# The `runcpu` command above doesn't fail even if the benchmark fails to run. To determine failure, parse the CSV
# results and ensure there are no compilation errors or runtime errors in the status row. Also print the logs and
# fail if there are no CSV files at all, which implies a SPEC error.
print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %T/result/*.train.csv > %T/status || ! cat %T/result/*.log')
print(f'RUN: ! grep -E "CE|RE" %T/status || ! cat %T/result/*.log')
# If there were no errors, parse the results into LNT-compatible format and print them.
print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt')
print(f'RUN: cat %T/results.lnt')

View File

@ -58,7 +58,10 @@ def main(argv):
'sure to use appropriate quoting for header names that contain spaces. This option only makes sense '
'when the output format is CSV.')
parser.add_argument('--keep-not-run', action='store_true',
help='Keep entries whose \'Base Status\' is marked as \'NR\', aka \'Not Run\'. By default, such entries are discarded.')
help='Keep entries whose "Base Status" is marked as "NR" (aka "Not Run"). By default, such entries are discarded.')
parser.add_argument('--keep-failed', action='store_true',
help='Keep entries whose "Base Status" is marked as "CE" (aka "Compilation Error") or "RE" (aka "Runtime Error"). '
'By default, such entries are discarded.')
args = parser.parse_args(argv)
if args.table == 'full':
@ -76,10 +79,12 @@ def main(argv):
headers = parsed_headers
rows.extend(parsed_rows)
# Remove rows that were not run unless we were asked to keep them
# Remove rows that were not run (or failed) unless we were asked to keep them
status = headers.index('Base Status')
if not args.keep_not_run:
not_run = headers.index('Base Status')
rows = [row for row in rows if row[not_run] != 'NR']
rows = [row for row in rows if row[status] != 'NR']
if not args.keep_failed:
rows = [row for row in rows if row[status] not in ('CE', 'RE')]
if args.extract is not None:
if args.output_format != 'csv':