From 3a8eabeb3a1e75f42c8c13f5ca4fdc8a68f7e99d Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Thu, 12 Mar 2026 11:01:15 +0000 Subject: [PATCH] [BOLT][AArch64] Support block reordering beyond 1KB for FEAT_CMPBR. (#185443) Currently LongJmpPass::relaxLocalBranches bails early if the estimated size of a binary function is less than 32KB assuming that the shortest branches are 16 bits. Therefore the fixup value for the cold branch target may go out of range if the function is larger than 1KB. I am decreasing ShortestJumpSpan from 32KB to 1KB, since FEAT_CMPBR branches are 11 bits. --- bolt/include/bolt/Passes/LongJmp.h | 2 +- .../compare-and-branch-reorder-blocks.S | 23 ++++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index 4b4935888599..4633d30104d4 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -64,7 +64,7 @@ class LongJmpPass : public BinaryFunctionPass { uint32_t NumSharedStubs{0}; /// The shortest distance for any branch instruction on AArch64. - static constexpr size_t ShortestJumpBits = 16; + static constexpr size_t ShortestJumpBits = 11; static constexpr size_t ShortestJumpSpan = 1ULL << (ShortestJumpBits - 1); /// The longest single-instruction branch. diff --git a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S index 0a5e75e9a437..c922da9058cd 100644 --- a/bolt/test/AArch64/compare-and-branch-reorder-blocks.S +++ b/bolt/test/AArch64/compare-and-branch-reorder-blocks.S @@ -10,8 +10,8 @@ # RUN: %clang %cflags -march=armv9-a+cmpbr -Wl,-q %s -o %t -DNUM_NOPS=256 # RUN: link_fdata --no-lbr %s %t %t.fdata -# RUN: not llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops 2>&1 \ -# RUN: | FileCheck %s --check-prefix=FIXUP_OUT_OF_RANGE +# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model --keep-nops +# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=BEYOND-1KB .globl reorder_blocks .type reorder_blocks, %function @@ -31,15 +31,15 @@ reorder_blocks: .rept NUM_NOPS nop .endr - mov x0, #2 - ret + mov x0, #2 + ret ## Force relocation mode. .reloc 0, R_AARCH64_NONE # CHECK: Disassembly of section .text: -# CHECK: : +# CHECK: : # CHECK-NEXT: {{.*}} cbgt x0, #0x0, 0x[[ADDR:[0-9a-f]+]] <{{.*}}> # CHECK: <.hot_exit>: # CHECK-NEXT: {{.*}} mov x0, #0x2 // =2 @@ -48,4 +48,15 @@ reorder_blocks: # CHECK-NEXT: [[ADDR]]: {{.*}} mov x0, #0x1 // =1 # CHECK-NEXT: {{.*}} ret -# FIXUP_OUT_OF_RANGE: error: fixup value out of range +# BEYOND-1KB: Disassembly of section .text: + +# BEYOND-1KB: : +# BEYOND-1KB-NEXT: {{.*}} cblt x0, #0x1, 0x[[ADDR0:[0-9a-f]+]] <{{.*}}> +# BEYOND-1KB-NEXT: {{.*}} b 0x[[ADDR1:[0-9a-f]+]] <{{.*}}> +# BEYOND-1KB: <.hot_exit>: +# BEYOND-1KB-NEXT: [[ADDR0]]: {{.*}} nop +# BEYOND-1KB: {{.*}} mov x0, #0x2 // =2 +# BEYOND-1KB-NEXT: {{.*}} ret +# BEYOND-1KB: <.cold_exit>: +# BEYOND-1KB-NEXT: [[ADDR1]]: {{.*}} mov x0, #0x1 // =1 +# BEYOND-1KB-NEXT: {{.*}} ret