llvm-project/llvm/test/CodeGen/X86/callbr-asm.ll
Simon Tatham 56acb06bc6
[ARM,AArch64] Don't put BTI at asm goto branch targets (#141562)
In 'asm goto' statements ('callbr' in LLVM IR), you can specify one or
more labels / basic blocks in the containing function which the assembly
code might jump to. If you're also compiling with branch target
enforcement via BTI, then previously listing a basic block as a possible
jump destination of an asm goto would cause a BTI instruction to be
placed at the start of the block, in case the assembly code used an
_indirect_ branch instruction (i.e. to a destination address read from a
register) to jump to that location. Now it doesn't do that any more:
branches to destination labels from the assembly code are assumed to be
direct branches (to a relative offset encoded in the instruction), which
don't require a BTI at their destination.

This change was proposed in https://discourse.llvm.org/t/85845 and there
seemed to be no disagreement. The rationale is:

1. it brings clang's handling of asm goto in Arm and AArch64 in line
with gcc's, which didn't generate BTIs at the target labels in the first
place.

2. it improves performance in the Linux kernel, which uses a lot of 'asm
goto' in which the assembly language just contains a NOP, and the
label's address is saved elsewhere to let the kernel self-modify at run
time to swap between the original NOP and a direct branch to the label.
This allows hot code paths to be instrumented for debugging, at only the
cost of a NOP when the instrumentation is turned off, instead of the
larger cost of an indirect branch. In this situation a BTI is
unnecessary (if the branch happens it's direct), and since the code
paths are hot, also a noticeable performance hit.

Implementation:

`SelectionDAGBuilder::visitCallBr` is the place where 'asm goto' target
labels are handled. It calls `setIsInlineAsmBrIndirectTarget()` on each
target `MachineBasicBlock`. Previously it also called
`setMachineBlockAddressTaken()`, which made `hasAddressTaken()` return
true, which caused a BTI to be added in the Arm backends.

Now `visitCallBr` doesn't call `setMachineBlockAddressTaken()` any more
on asm goto targets, but `hasAddressTaken()` also checks the flag set by
`setIsInlineAsmBrIndirectTarget()`. So call sites that were using
`hasAddressTaken()` don't need to be modified. But the Arm backends
don't call `hasAddressTaken()` any more: instead they test two more
specific query functions that cover all the reasons `hasAddressTaken()`
might have returned true _except_ being an asm goto target.

Testing:

The new test `AArch64/callbr-asm-label-bti.ll` is testing the actual
change, where it expects not to see a `bti` instruction after
`[[LABEL]]`. The rest of the test changes are all churn, due to the
flags on basic blocks changing. Actual output code hasn't changed in any
of the existing tests, only comments and diagnostics.

Further work:

`RISCVIndirectBranchTracking.cpp` and `X86IndirectBranchTracking.cpp`
also call `hasAddressTaken()` in a way that might benefit from using the
same more specific check I've put in `ARMBranchTargets.cpp` and
`AArch64BranchTargets.cpp`. But I'm not sure of that, so in this commit
I've only changed the Arm backends, and left those alone.
2025-06-03 08:44:13 +01:00

198 lines
6.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -O3 -verify-machineinstrs | FileCheck %s
; Tests for using callbr as an asm-goto wrapper
; Test 1 - fallthrough label gets removed, but the fallthrough code that is
; unreachable due to asm ending on a jmp is still left in.
define i32 @test1(i32 %a) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $4, %eax
; CHECK-NEXT: #APP
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: jmp .LBB0_2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # %bb.1: # %normal
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB0_2: # Inline asm indirect target
; CHECK-NEXT: # %fail
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: retl
entry:
%0 = add i32 %a, 4
callbr void asm "xorl $0, $0; jmp ${1:l}", "r,!i,~{dirflag},~{fpsr},~{flags}"(i32 %0) to label %normal [label %fail]
normal:
ret i32 0
fail:
ret i32 1
}
; Test 1b - Like test 1 but using `asm inteldialect`.
define i32 @test1b(i32 %a) {
; CHECK-LABEL: test1b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $4, %eax
; CHECK-NEXT: #APP
; CHECK-EMPTY:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: jmp .LBB1_2
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # %bb.1: # %normal
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB1_2: # Inline asm indirect target
; CHECK-NEXT: # %fail
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: retl
entry:
%0 = add i32 %a, 4
callbr void asm inteldialect "xor $0, $0; jmp ${1:l}", "r,!i,~{dirflag},~{fpsr},~{flags}"(i32 %0) to label %normal [label %fail]
normal:
ret i32 0
fail:
ret i32 1
}
; Test 2 - callbr terminates an unreachable block, function gets simplified
; to a trivial zero return.
define i32 @test2(i32 %a) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retl
entry:
br label %normal
unreachableasm:
%0 = add i32 %a, 4
callbr void asm sideeffect "xorl $0, $0; jmp ${1:l}", "r,!i,~{dirflag},~{fpsr},~{flags}"(i32 %0) to label %normal [label %fail]
normal:
ret i32 0
fail:
ret i32 1
}
; Test 3 - asm-goto implements a loop. The loop gets recognized, but many loop
; transforms fail due to canonicalization having callbr exceptions. Trivial
; blocks at labels 1 and 3 also don't get simplified due to callbr.
define i32 @test3(i32 %a) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .LBB3_1: # Inline asm indirect target
; CHECK-NEXT: # %label01
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_2 Depth 2
; CHECK-NEXT: # Child Loop BB3_3 Depth 3
; CHECK-NEXT: # Child Loop BB3_4 Depth 4
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: .LBB3_2: # Inline asm indirect target
; CHECK-NEXT: # %label02
; CHECK-NEXT: # Parent Loop BB3_1 Depth=1
; CHECK-NEXT: # => This Loop Header: Depth=2
; CHECK-NEXT: # Child Loop BB3_3 Depth 3
; CHECK-NEXT: # Child Loop BB3_4 Depth 4
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: addl $4, {{[0-9]+}}(%esp)
; CHECK-NEXT: .LBB3_3: # Inline asm indirect target
; CHECK-NEXT: # %label03
; CHECK-NEXT: # Parent Loop BB3_1 Depth=1
; CHECK-NEXT: # Parent Loop BB3_2 Depth=2
; CHECK-NEXT: # => This Loop Header: Depth=3
; CHECK-NEXT: # Child Loop BB3_4 Depth 4
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: .LBB3_4: # Inline asm indirect target
; CHECK-NEXT: # %label04
; CHECK-NEXT: # Parent Loop BB3_1 Depth=1
; CHECK-NEXT: # Parent Loop BB3_2 Depth=2
; CHECK-NEXT: # Parent Loop BB3_3 Depth=3
; CHECK-NEXT: # => This Inner Loop Header: Depth=4
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: #APP
; CHECK-NEXT: jmp .LBB3_1
; CHECK-NEXT: jmp .LBB3_2
; CHECK-NEXT: jmp .LBB3_3
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # %bb.5: # %normal0
; CHECK-NEXT: # in Loop: Header=BB3_4 Depth=4
; CHECK-NEXT: #APP
; CHECK-NEXT: jmp .LBB3_1
; CHECK-NEXT: jmp .LBB3_2
; CHECK-NEXT: jmp .LBB3_3
; CHECK-NEXT: jmp .LBB3_4
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # %bb.6: # %normal1
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: retl
entry:
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
br label %label01
label01: ; preds = %normal0, %label04, %entry
br label %label02
label02: ; preds = %normal0, %label04, %label01
%0 = load i32, ptr %a.addr, align 4
%add = add nsw i32 %0, 4
store i32 %add, ptr %a.addr, align 4
br label %label03
label03: ; preds = %normal0, %label04, %label02
br label %label04
label04: ; preds = %normal0, %label03
callbr void asm sideeffect "jmp ${0:l}; jmp ${1:l}; jmp ${2:l}", "!i,!i,!i,~{dirflag},~{fpsr},~{flags}"()
to label %normal0 [label %label01, label %label02, label %label03]
normal0: ; preds = %label04
callbr void asm sideeffect "jmp ${0:l}; jmp ${1:l}; jmp ${2:l}; jmp ${3:l}", "!i,!i,!i,!i,~{dirflag},~{fpsr},~{flags}"()
to label %normal1 [label %label01, label %label02, label %label03, label %label04]
normal1: ; preds = %normal0
%1 = load i32, ptr %a.addr, align 4
ret i32 %1
}
; Test 4 - asm-goto referenced with the 'l' (ell) modifier and not.
define void @test4() {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: ja .LBB4_3
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # %bb.1: # %asm.fallthrough
; CHECK-NEXT: #APP
; CHECK-NEXT: ja .LBB4_3
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: .LBB4_3: # Inline asm indirect target
; CHECK-NEXT: # %quux
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: retl
entry:
callbr void asm sideeffect "ja $0", "!i,~{dirflag},~{fpsr},~{flags}"()
to label %asm.fallthrough [label %quux]
asm.fallthrough: ; preds = %entry
callbr void asm sideeffect "ja ${0:l}", "!i,~{dirflag},~{fpsr},~{flags}"()
to label %cleanup [label %quux]
quux: ; preds = %asm.fallthrough, %entry
br label %cleanup
cleanup: ; preds = %asm.fallthrough, %quux
ret void
}