[AArch64] Fix Windows prologue handling to pair more registers. (#170214)

Currently, there's code to suppress pairing, but we don't actually need
to suppress that; we just need to suppress the formation of
pre-decrement/post-increment instructions.

Pairing saves an instruction in some cases, and enables packed unwind in
some cases.
This commit is contained in:
Eli Friedman 2026-01-16 15:36:47 -08:00 committed by GitHub
parent 08bcd7cb00
commit da34f9bb45
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 84 additions and 91 deletions

View File

@ -1560,7 +1560,6 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
unsigned SpillCount, unsigned Reg1,
unsigned Reg2, bool NeedsWinCFI,
bool IsFirst,
const TargetRegisterInfo *TRI) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
@ -1586,12 +1585,9 @@ static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
: false;
// If pairing a GPR with LR, the pair can be described by the save_lrpair
// opcode. If this is the first register pair, it would end up with a
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
// if LR is paired with something else than the first register.
// The save_lrpair opcode requires the first register to be an odd one.
// opcode. The save_lrpair opcode requires the first register to be odd.
if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR)
return false;
return true;
}
@ -1604,12 +1600,10 @@ static bool invalidateRegisterPairing(bool SpillExtendedVolatile,
unsigned SpillCount, unsigned Reg1,
unsigned Reg2, bool UsesWinAAPCS,
bool NeedsWinCFI, bool NeedsFrameRecord,
bool IsFirst,
const TargetRegisterInfo *TRI) {
if (UsesWinAAPCS)
return invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
Reg1, Reg2, NeedsWinCFI, IsFirst,
TRI);
Reg1, Reg2, NeedsWinCFI, TRI);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@ -1779,21 +1773,20 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
!invalidateRegisterPairing(
SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
!invalidateRegisterPairing(SpillExtendedVolatile, SpillCount,
RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
!invalidateRegisterPairing(
SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
!invalidateRegisterPairing(SpillExtendedVolatile, SpillCount,
RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:

View File

@ -168,9 +168,16 @@ AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
//
// On Windows, some register pairs involving LR can't be folded because
// there isn't a corresponding unwind opcode.
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
(NeedsWinCFI &&
(NewOpc == AArch64::LDPXpost || NewOpc == AArch64::STPXpre) &&
RegInfo.getEncodingValue(MBBI->getOperand(0).getReg()) + 1 !=
RegInfo.getEncodingValue(MBBI->getOperand(1).getReg()))) {
// If we are destroying the frame, make sure we add the increment after the
// last frame operation.
if (FrameFlag == MachineInstr::FrameDestroy) {
@ -310,6 +317,11 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// (to force a stp with predecrement) to match the packed unwind format,
// provided that there actually are any callee saved registers to merge the
// decrement with.
//
// Note that for certain paired saves, like "x19, lr", we can't actually
// emit an predecrement stp, but packed unwind still expects a separate stack
// adjustment.
//
// This is potentially marginally slower, but allows using the packed
// unwind format for functions that both have a local area and callee saved
// registers. Using the packed unwind format notably reduces the size of

View File

@ -143,10 +143,10 @@ define void @call_copy_pod() {
; CHECK-LABEL: call_copy_pod:
; CHECK: .seh_proc call_copy_pod
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, Pod
; CHECK-NEXT: add x19, x19, :lo12:Pod
@ -154,10 +154,10 @@ define void @call_copy_pod() {
; CHECK-NEXT: bl copy_pod
; CHECK-NEXT: stp d0, d1, [x19]
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet
@ -175,10 +175,8 @@ define void @call_copy_notcxx14aggregate() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: .seh_stackalloc 32
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x19, 16
; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 24
; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, NotCXX14Aggregate
; CHECK-NEXT: add x19, x19, :lo12:NotCXX14Aggregate
@ -188,10 +186,8 @@ define void @call_copy_notcxx14aggregate() {
; CHECK-NEXT: ldp d0, d1, [sp]
; CHECK-NEXT: stp d0, d1, [x19]
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 24
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x19, 16
; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: .seh_stackalloc 32
; CHECK-NEXT: .seh_endepilogue
@ -211,10 +207,10 @@ define void @call_copy_notpod() {
; CHECK-LABEL: call_copy_notpod:
; CHECK: .seh_proc call_copy_notpod
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, NotPod
; CHECK-NEXT: add x19, x19, :lo12:NotPod
@ -222,10 +218,10 @@ define void @call_copy_notpod() {
; CHECK-NEXT: bl copy_notpod
; CHECK-NEXT: stp x0, x1, [x19]
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet

View File

@ -9,10 +9,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: .seh_stackalloc 80
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x19, 16
; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 24
; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: add x8, sp, #40
; CHECK-NEXT: mov w19, w0
@ -27,10 +25,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; CHECK-NEXT: cmp w19, w0
; CHECK-NEXT: cset w0, ls
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 24
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x19, 16
; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: .seh_stackalloc 80
; CHECK-NEXT: .seh_endepilogue
@ -43,10 +39,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; GISEL-NEXT: // %bb.0:
; GISEL-NEXT: sub sp, sp, #80
; GISEL-NEXT: .seh_stackalloc 80
; GISEL-NEXT: str x19, [sp, #16] // 8-byte Spill
; GISEL-NEXT: .seh_save_reg x19, 16
; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill
; GISEL-NEXT: .seh_save_reg x30, 24
; GISEL-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: .seh_save_lrpair x19, 16
; GISEL-NEXT: .seh_endprologue
; GISEL-NEXT: add x8, sp, #40
; GISEL-NEXT: mov w19, w0
@ -61,10 +55,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; GISEL-NEXT: cmp w19, w0
; GISEL-NEXT: cset w0, ls
; GISEL-NEXT: .seh_startepilogue
; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload
; GISEL-NEXT: .seh_save_reg x30, 24
; GISEL-NEXT: ldr x19, [sp, #16] // 8-byte Reload
; GISEL-NEXT: .seh_save_reg x19, 16
; GISEL-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
; GISEL-NEXT: .seh_save_lrpair x19, 16
; GISEL-NEXT: add sp, sp, #80
; GISEL-NEXT: .seh_stackalloc 80
; GISEL-NEXT: .seh_endepilogue

View File

@ -7,19 +7,19 @@ define dso_local i32 @func(ptr %g, i32 %a) "sign-return-address"="non-leaf" "sig
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: hint #27
; CHECK-NEXT: .seh_pac_sign_lr
; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: blr x0
; CHECK-NEXT: mov w0, w19
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: hint #31
; CHECK-NEXT: .seh_pac_sign_lr
; CHECK-NEXT: .seh_endepilogue
@ -40,19 +40,19 @@ define dso_local i32 @func2(ptr %g, i32 %a) "sign-return-address"="non-leaf" "si
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: pacibsp
; CHECK-NEXT: .seh_pac_sign_lr
; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: blr x0
; CHECK-NEXT: mov w0, w19
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-NEXT: .seh_save_reg x30, 8
; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg_x x19, 16
; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: autibsp
; CHECK-NEXT: .seh_pac_sign_lr
; CHECK-NEXT: .seh_endepilogue

View File

@ -1,13 +1,13 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: -stop-after=prologepilog | FileCheck %s
# Check that lr isn't paired with a GPR if it's the first pair, as
# that can't be described as a SEH opcode if combined with predecrement.
# Check that when LR is paired with a GPR, we don't combine it into a
# predecrement that can't be described as a SEH opcode.
# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16
# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 1
# CHECK-NEXT: frame-setup SEH_SaveReg 30, 8
# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup SEH_StackAlloc 16
# CHECK-NEXT: frame-setup STPXi killed $x19, killed $lr, $sp, 0
# CHECK-NEXT: frame-setup SEH_SaveRegP 19, 30, 0
# CHECK-NEXT: frame-setup SEH_PrologEnd
--- |

View File

@ -21,18 +21,18 @@
; }
; OBJ: DefRangeRegisterRelSym {
; OBJ: Kind: S_DEFRANGE_REGISTER_REL (0x1145)
; OBJ: BaseRegister: ARM64_SP (0x51)
; OBJ: HasSpilledUDTMember: No
; OBJ: OffsetInParent: 0
; OBJ: BasePointerOffset: 12
; OBJ: LocalVariableAddrRange {
; OBJ: OffsetStart: .text+0x14
; OBJ: ISectStart: 0x0
; OBJ: Range: 0x30
; OBJ: }
; OBJ: }
; OBJ: DefRangeRegisterRelSym {
; OBJ-NEXT: Kind: S_DEFRANGE_REGISTER_REL (0x1145)
; OBJ-NEXT: BaseRegister: ARM64_SP (0x51)
; OBJ-NEXT: HasSpilledUDTMember: No
; OBJ-NEXT: OffsetInParent: 0
; OBJ-NEXT: BasePointerOffset: 12
; OBJ-NEXT: LocalVariableAddrRange {
; OBJ-NEXT: OffsetStart: .text+0x10
; OBJ-NEXT: ISectStart: 0x0
; OBJ-NEXT: Range: 0x2C
; OBJ-NEXT: }
; OBJ-NEXT: }
; ModuleID = 't.cpp'
source_filename = "test/DebugInfo/COFF/register-variables-arm64.ll"