
In 'asm goto' statements ('callbr' in LLVM IR), you can specify one or more labels / basic blocks in the containing function which the assembly code might jump to. If you're also compiling with branch target enforcement via BTI, then previously listing a basic block as a possible jump destination of an asm goto would cause a BTI instruction to be placed at the start of the block, in case the assembly code used an _indirect_ branch instruction (i.e. to a destination address read from a register) to jump to that location. Now it doesn't do that any more: branches to destination labels from the assembly code are assumed to be direct branches (to a relative offset encoded in the instruction), which don't require a BTI at their destination. This change was proposed in https://discourse.llvm.org/t/85845 and there seemed to be no disagreement. The rationale is: 1. it brings clang's handling of asm goto in Arm and AArch64 in line with gcc's, which didn't generate BTIs at the target labels in the first place. 2. it improves performance in the Linux kernel, which uses a lot of 'asm goto' in which the assembly language just contains a NOP, and the label's address is saved elsewhere to let the kernel self-modify at run time to swap between the original NOP and a direct branch to the label. This allows hot code paths to be instrumented for debugging, at only the cost of a NOP when the instrumentation is turned off, instead of the larger cost of an indirect branch. In this situation a BTI is unnecessary (if the branch happens it's direct), and since the code paths are hot, also a noticeable performance hit. Implementation: `SelectionDAGBuilder::visitCallBr` is the place where 'asm goto' target labels are handled. It calls `setIsInlineAsmBrIndirectTarget()` on each target `MachineBasicBlock`. Previously it also called `setMachineBlockAddressTaken()`, which made `hasAddressTaken()` return true, which caused a BTI to be added in the Arm backends. Now `visitCallBr` doesn't call `setMachineBlockAddressTaken()` any more on asm goto targets, but `hasAddressTaken()` also checks the flag set by `setIsInlineAsmBrIndirectTarget()`. So call sites that were using `hasAddressTaken()` don't need to be modified. But the Arm backends don't call `hasAddressTaken()` any more: instead they test two more specific query functions that cover all the reasons `hasAddressTaken()` might have returned true _except_ being an asm goto target. Testing: The new test `AArch64/callbr-asm-label-bti.ll` is testing the actual change, where it expects not to see a `bti` instruction after `[[LABEL]]`. The rest of the test changes are all churn, due to the flags on basic blocks changing. Actual output code hasn't changed in any of the existing tests, only comments and diagnostics. Further work: `RISCVIndirectBranchTracking.cpp` and `X86IndirectBranchTracking.cpp` also call `hasAddressTaken()` in a way that might benefit from using the same more specific check I've put in `ARMBranchTargets.cpp` and `AArch64BranchTargets.cpp`. But I'm not sure of that, so in this commit I've only changed the Arm backends, and left those alone.
261 lines
7.5 KiB
LLVM
261 lines
7.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=powerpc64le-unknown-linux-unknown -verify-machineinstrs %s \
|
|
; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64LE
|
|
; RUN: llc -mtriple=powerpc64-unknown-linux-unknown -verify-machineinstrs %s \
|
|
; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64BE
|
|
|
|
define dso_local void @ClobberLR() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: ClobberLR:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: mflr r0
|
|
; PPC64LE-NEXT: stdu r1, -32(r1)
|
|
; PPC64LE-NEXT: std r0, 48(r1)
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: addi r1, r1, 32
|
|
; PPC64LE-NEXT: ld r0, 16(r1)
|
|
; PPC64LE-NEXT: mtlr r0
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: ClobberLR:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: mflr r0
|
|
; PPC64BE-NEXT: stdu r1, -48(r1)
|
|
; PPC64BE-NEXT: std r0, 64(r1)
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: addi r1, r1, 48
|
|
; PPC64BE-NEXT: ld r0, 16(r1)
|
|
; PPC64BE-NEXT: mtlr r0
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call void asm sideeffect "", "~{lr}"()
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @ClobberR5() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: ClobberR5:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: ClobberR5:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call void asm sideeffect "", "~{r5}"()
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @ClobberR15() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: ClobberR15:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: ClobberR15:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: std r15, -136(r1) # 8-byte Folded Spill
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call void asm sideeffect "", "~{r15}"()
|
|
ret void
|
|
}
|
|
|
|
;; Test for INLINEASM_BR
|
|
define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 {
|
|
; PPC64LE-LABEL: ClobberLR_BR:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: mflr r0
|
|
; PPC64LE-NEXT: stdu r1, -32(r1)
|
|
; PPC64LE-NEXT: std r0, 48(r1)
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: nop
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: .LBB3_1: # %return
|
|
; PPC64LE-NEXT: addi r1, r1, 32
|
|
; PPC64LE-NEXT: ld r0, 16(r1)
|
|
; PPC64LE-NEXT: mtlr r0
|
|
; PPC64LE-NEXT: blr
|
|
; PPC64LE-NEXT: .LBB3_2: # Inline asm indirect target
|
|
; PPC64LE-NEXT: # %return_early
|
|
; PPC64LE-NEXT: # Label of block must be emitted
|
|
; PPC64LE-NEXT: li r3, 0
|
|
; PPC64LE-NEXT: b .LBB3_1
|
|
;
|
|
; PPC64BE-LABEL: ClobberLR_BR:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: mflr r0
|
|
; PPC64BE-NEXT: stdu r1, -48(r1)
|
|
; PPC64BE-NEXT: std r0, 64(r1)
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: nop
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: .LBB3_1: # %return
|
|
; PPC64BE-NEXT: addi r1, r1, 48
|
|
; PPC64BE-NEXT: ld r0, 16(r1)
|
|
; PPC64BE-NEXT: mtlr r0
|
|
; PPC64BE-NEXT: blr
|
|
; PPC64BE-NEXT: .LBB3_2: # Inline asm indirect target
|
|
; PPC64BE-NEXT: # %return_early
|
|
; PPC64BE-NEXT: # Label of block must be emitted
|
|
; PPC64BE-NEXT: li r3, 0
|
|
; PPC64BE-NEXT: b .LBB3_1
|
|
entry:
|
|
callbr void asm sideeffect "nop", "!i,~{lr}"()
|
|
to label %return [label %return_early]
|
|
|
|
return_early:
|
|
br label %return
|
|
|
|
return:
|
|
%retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ]
|
|
ret i32 %retval.0
|
|
}
|
|
|
|
define dso_local signext i32 @ClobberR5_BR(i32 signext %in) #0 {
|
|
; PPC64LE-LABEL: ClobberR5_BR:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: nop
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: # %bb.1: # %return
|
|
; PPC64LE-NEXT: blr
|
|
; PPC64LE-NEXT: .LBB4_2: # Inline asm indirect target
|
|
; PPC64LE-NEXT: # %return_early
|
|
; PPC64LE-NEXT: # Label of block must be emitted
|
|
; PPC64LE-NEXT: li r3, 0
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: ClobberR5_BR:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: nop
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: # %bb.1: # %return
|
|
; PPC64BE-NEXT: blr
|
|
; PPC64BE-NEXT: .LBB4_2: # Inline asm indirect target
|
|
; PPC64BE-NEXT: # %return_early
|
|
; PPC64BE-NEXT: # Label of block must be emitted
|
|
; PPC64BE-NEXT: li r3, 0
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
callbr void asm sideeffect "nop", "!i,~{r5}"()
|
|
to label %return [label %return_early]
|
|
|
|
return_early:
|
|
br label %return
|
|
|
|
return:
|
|
%retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ]
|
|
ret i32 %retval.0
|
|
}
|
|
|
|
|
|
|
|
define dso_local void @DefLR() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: DefLR:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: mflr r0
|
|
; PPC64LE-NEXT: stdu r1, -32(r1)
|
|
; PPC64LE-NEXT: std r0, 48(r1)
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: addi r1, r1, 32
|
|
; PPC64LE-NEXT: ld r0, 16(r1)
|
|
; PPC64LE-NEXT: mtlr r0
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: DefLR:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: mflr r0
|
|
; PPC64BE-NEXT: stdu r1, -48(r1)
|
|
; PPC64BE-NEXT: std r0, 64(r1)
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: addi r1, r1, 48
|
|
; PPC64BE-NEXT: ld r0, 16(r1)
|
|
; PPC64BE-NEXT: mtlr r0
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call i64 asm sideeffect "", "={lr}"()
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @EarlyClobberLR() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: EarlyClobberLR:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: mflr r0
|
|
; PPC64LE-NEXT: stdu r1, -32(r1)
|
|
; PPC64LE-NEXT: std r0, 48(r1)
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: addi r1, r1, 32
|
|
; PPC64LE-NEXT: ld r0, 16(r1)
|
|
; PPC64LE-NEXT: mtlr r0
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: EarlyClobberLR:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: mflr r0
|
|
; PPC64BE-NEXT: stdu r1, -48(r1)
|
|
; PPC64BE-NEXT: std r0, 64(r1)
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: addi r1, r1, 48
|
|
; PPC64BE-NEXT: ld r0, 16(r1)
|
|
; PPC64BE-NEXT: mtlr r0
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call i64 asm sideeffect "", "=&{lr}"()
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @ClobberMulti() local_unnamed_addr #0 {
|
|
; PPC64LE-LABEL: ClobberMulti:
|
|
; PPC64LE: # %bb.0: # %entry
|
|
; PPC64LE-NEXT: mflr r0
|
|
; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill
|
|
; PPC64LE-NEXT: std r16, -128(r1) # 8-byte Folded Spill
|
|
; PPC64LE-NEXT: stdu r1, -176(r1)
|
|
; PPC64LE-NEXT: std r0, 192(r1)
|
|
; PPC64LE-NEXT: #APP
|
|
; PPC64LE-NEXT: #NO_APP
|
|
; PPC64LE-NEXT: addi r1, r1, 176
|
|
; PPC64LE-NEXT: ld r0, 16(r1)
|
|
; PPC64LE-NEXT: ld r16, -128(r1) # 8-byte Folded Reload
|
|
; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
|
|
; PPC64LE-NEXT: mtlr r0
|
|
; PPC64LE-NEXT: blr
|
|
;
|
|
; PPC64BE-LABEL: ClobberMulti:
|
|
; PPC64BE: # %bb.0: # %entry
|
|
; PPC64BE-NEXT: mflr r0
|
|
; PPC64BE-NEXT: stdu r1, -192(r1)
|
|
; PPC64BE-NEXT: std r0, 208(r1)
|
|
; PPC64BE-NEXT: std r15, 56(r1) # 8-byte Folded Spill
|
|
; PPC64BE-NEXT: std r16, 64(r1) # 8-byte Folded Spill
|
|
; PPC64BE-NEXT: #APP
|
|
; PPC64BE-NEXT: #NO_APP
|
|
; PPC64BE-NEXT: ld r16, 64(r1) # 8-byte Folded Reload
|
|
; PPC64BE-NEXT: ld r15, 56(r1) # 8-byte Folded Reload
|
|
; PPC64BE-NEXT: addi r1, r1, 192
|
|
; PPC64BE-NEXT: ld r0, 16(r1)
|
|
; PPC64BE-NEXT: mtlr r0
|
|
; PPC64BE-NEXT: blr
|
|
entry:
|
|
tail call void asm sideeffect "", "~{lr},~{r15},~{r16}"()
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|