[RISCV] Support Push/Pop with Xqci (#134191)

The `qc.c.mienter` and `qc.c.mienter.nest` instructions, broadly only
save the argument and temporary registers. The exceptions are that they
also save `fp` (`s0`) to construct a frame chain from the signal handler
to the frame below, and they also save `ra`. They are designed this way
so that (if needed) push and pop instructions can be used to save the
callee-saved registers.

This patch implements this optimisation, constructing the following
rather than a long sequence of `sw` and `lw` instructions for saving the
callee-saved registers:

```asm
  qc.c.mienter
  qc.cm.push {ra, s0-sN}, -M
  ...
  qc.cm.pop {ra, s0-sN}, M
  qc.c.mileaveret
```

There are some carefully-worked-out details here, especially around CFI
information. For any register saved by both `qc.c.mienter(.nest)` and
the push (which is `ra` and `s0` at most), we point the CFI information
at the version saved by `qc.c.mienter(.nest)`. This ensures the CFI
points at the same `fp` copy as a frame pointer unwinder would find.
This commit is contained in:
Sam Elliott 2025-04-28 08:12:45 -07:00 committed by GitHub
parent 258e1438c2
commit 3f46af98ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2607 additions and 287 deletions

View File

@ -568,6 +568,18 @@ getPushOrLibCallsSavedInfo(const MachineFunction &MF,
return PushOrLibCallsCSI;
for (const auto &CS : CSI) {
if (RVFI->useQCIInterrupt(MF)) {
// Some registers are saved by both `QC.C.MIENTER(.NEST)` and
// `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points
// to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP
// unwinding would use.
const auto *FII = llvm::find_if(FixedCSRFIQCIInterruptMap, [&](auto P) {
return P.first == CS.getReg();
});
if (FII != std::end(FixedCSRFIQCIInterruptMap))
continue;
}
const auto *FII = llvm::find_if(
FixedCSRFIMap, [&](MCPhysReg P) { return P == CS.getReg(); });
if (FII != std::end(FixedCSRFIMap))
@ -866,12 +878,12 @@ static bool isPop(unsigned Opcode) {
}
static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,
bool HasFP) {
bool UpdateFP) {
switch (Kind) {
case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
return RISCV::CM_PUSH;
case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
return HasFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
default:
llvm_unreachable("Unhandled PushPopKind");
}
@ -914,7 +926,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Emit prologue for shadow call stack.
emitSCSPrologue(MF, MBB, MBBI, DL);
auto FirstFrameSetup = MBBI;
// We keep track of the first instruction because it might be a
// `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than
// inserting an `addi sp, sp, -N*16`
auto PossiblePush = MBBI;
// Skip past all callee-saved register spill instructions.
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
@ -988,19 +1003,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
}
if (RVFI->useQCIInterrupt(MF)) {
CFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
// The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)`
// could only be the next instruction.
++PossiblePush;
// Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
// could be. The PUSH will also get its own CFI metadata for its own
// modifications, which should come after the PUSH.
CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
CFIBuilder.buildOffset(CS.getReg(),
MFI.getObjectOffset(CS.getFrameIdx()));
} else if (RVFI->isPushable(MF) && FirstFrameSetup != MBB.end() &&
isPush(FirstFrameSetup->getOpcode())) {
PushCFIBuilder.buildOffset(CS.getReg(),
MFI.getObjectOffset(CS.getFrameIdx()));
}
if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
isPush(PossiblePush->getOpcode())) {
// Use available stack adjustment in push instruction to allocate additional
// stack space. Align the stack size down to a multiple of 16. This is
// needed for RVE.
// FIXME: Can we increase the stack size to a multiple of 16 instead?
uint64_t StackAdj =
std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48));
FirstFrameSetup->getOperand(1).setImm(StackAdj);
PossiblePush->getOperand(1).setImm(StackAdj);
StackSize -= StackAdj;
CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
@ -1305,17 +1330,21 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
CFIBuilder.buildRestore(CS.getReg());
// Update CFA offset. After CM_POP SP should be equal to CFA, so CFA
// offset should be a zero.
CFIBuilder.buildDefCFAOffset(0);
// Update CFA Offset. If this is a QCI interrupt function, there will be a
// leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
// getQCIInterruptStackSize() will be 0.
CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
}
}
emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL);
// Deallocate stack if StackSize isn't a zero yet
// Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt
// function, there will be a leftover offset which is deallocated by
// `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0.
if (StackSize != 0)
deallocateStack(MF, MBB, MBBI, DL, StackSize, RealStackSize - StackSize);
deallocateStack(MF, MBB, MBBI, DL, StackSize,
RVFI->getQCIInterruptStackSize());
// Emit epilogue for shadow call stack.
emitSCSEpilogue(MF, MBB, MBBI, DL);
@ -1894,10 +1923,17 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
if (RVFI->useQCIInterrupt(MF)) {
RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount);
} else if (RVFI->isPushable(MF)) {
}
if (RVFI->isPushable(MF)) {
// Determine how many GPRs we need to push and save it to RVFI.
unsigned PushedRegNum = getNumPushPopRegs(CSI);
if (PushedRegNum) {
// `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if
// we want to push more than 2 registers. Otherwise, we should push if we
// want to push more than 0 registers.
unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0;
if (PushedRegNum > OnlyPushIfMoreThan) {
RVFI->setRVPushRegs(PushedRegNum);
RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
}
@ -1923,8 +1959,9 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
CS.setFrameIdx(FrameIdx);
continue;
}
// TODO: QCI Interrupt + Push/Pop
} else if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
}
if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
const auto *FII = llvm::find_if(
FixedCSRFIMap, [&](MCPhysReg P) { return P == CS.getReg(); });
unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII);
@ -1937,6 +1974,9 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
else
Offset = -int64_t(RegNum + 1) * Size;
if (RVFI->useQCIInterrupt(MF))
Offset -= QCIInterruptPushAmount;
int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset);
assert(FrameIdx < 0);
CS.setFrameIdx(FrameIdx);
@ -1965,10 +2005,13 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
// because there are gaps which are reserved for future use.
MFI.CreateFixedSpillStackObject(
QCIInterruptPushAmount, -static_cast<int64_t>(QCIInterruptPushAmount));
} else if (RVFI->isPushable(MF)) {
}
if (RVFI->isPushable(MF)) {
int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0;
// Allocate a fixed object that covers the full push.
if (int64_t PushSize = RVFI->getRVPushStackSize())
MFI.CreateFixedSpillStackObject(PushSize, -PushSize);
MFI.CreateFixedSpillStackObject(PushSize, -PushSize - QCIOffset);
} else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) {
int64_t LibCallFrameSize =
alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
@ -2003,13 +2046,15 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap)
MBB.addLiveIn(Reg);
// TODO: Handle QCI Interrupt + Push/Pop
} else if (RVFI->isPushable(*MF)) {
}
if (RVFI->isPushable(*MF)) {
// Emit CM.PUSH with base StackAdj & evaluate Push stack
unsigned PushedRegNum = RVFI->getRVPushRegs();
if (PushedRegNum > 0) {
// Use encoded number to represent registers to spill.
unsigned Opcode = getPushOpcode(RVFI->getPushPopKind(*MF), hasFP(*MF));
unsigned Opcode = getPushOpcode(
RVFI->getPushPopKind(*MF), hasFP(*MF) && !RVFI->useQCIInterrupt(*MF));
unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum);
MachineInstrBuilder PushBuilder =
BuildMI(MBB, MI, DL, TII.get(Opcode))
@ -2156,8 +2201,9 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
// QC.C.MILEAVERET which we already inserted to return.
assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET &&
"Unexpected QCI Interrupt Return Instruction");
// TODO: Handle QCI + Push/Pop
} else if (RVFI->isPushable(*MF)) {
}
if (RVFI->isPushable(*MF)) {
unsigned PushedRegNum = RVFI->getRVPushRegs();
if (PushedRegNum > 0) {
unsigned Opcode = getPopOpcode(RVFI->getPushPopKind(*MF));

File diff suppressed because it is too large Load Diff