[RISCV] Optimize the spill/reload of segment registers (#153184)
The simplest way is: 1. Save `vtype` to a scalar register. 2. Insert a `vsetvli`. 3. Use segment load/store. 4. Restore `vtype` via `vsetvl`. But `vsetvl` is usually slow, so this PR is not in this way. Instead, we use wider whole load/store instructions if the register encoding is aligned. We have done the same optimization for COPY in https://github.com/llvm/llvm-project/pull/84455. We found this suboptimal implementation when porting some video codec kernels via RVV intrinsics.
This commit is contained in:
parent
2e74cc6c04
commit
17a98f85c2
@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
|
||||
const TargetRegisterClass *RegClass) const {
|
||||
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
|
||||
const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
|
||||
RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
|
||||
unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
|
||||
|
||||
@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector(
|
||||
return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
|
||||
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
|
||||
};
|
||||
auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
|
||||
uint16_t Encoding) {
|
||||
MCRegister Reg = RISCV::V0 + Encoding;
|
||||
if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
|
||||
return Reg;
|
||||
return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
|
||||
};
|
||||
|
||||
while (I != NumRegs) {
|
||||
// For non-segment copying, we only do this once as the registers are always
|
||||
// aligned.
|
||||
@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector(
|
||||
|
||||
// Emit actual copying.
|
||||
// For reversed copying, the encoding should be decreased.
|
||||
MCRegister ActualSrcReg = FindRegWithEncoding(
|
||||
MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
|
||||
RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
|
||||
MCRegister ActualDstReg = FindRegWithEncoding(
|
||||
MCRegister ActualDstReg = TRI->findVRegWithEncoding(
|
||||
RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
|
||||
|
||||
auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
|
||||
|
@ -389,9 +389,25 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
|
||||
.setMIFlag(Flag);
|
||||
}
|
||||
|
||||
// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by
|
||||
// LMUL*VLENB bytes.
|
||||
void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
|
||||
static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned>
|
||||
getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) {
|
||||
if (NumRemaining >= 8 && RegEncoding % 8 == 0)
|
||||
return {RISCVVType::LMUL_8, RISCV::VRM8RegClass,
|
||||
IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V};
|
||||
if (NumRemaining >= 4 && RegEncoding % 4 == 0)
|
||||
return {RISCVVType::LMUL_4, RISCV::VRM4RegClass,
|
||||
IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V};
|
||||
if (NumRemaining >= 2 && RegEncoding % 2 == 0)
|
||||
return {RISCVVType::LMUL_2, RISCV::VRM2RegClass,
|
||||
IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V};
|
||||
return {RISCVVType::LMUL_1, RISCV::VRRegClass,
|
||||
IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V};
|
||||
}
|
||||
|
||||
// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores
|
||||
// separated by LMUL*VLENB bytes.
|
||||
void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
|
||||
bool IsSpill) const {
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
MachineBasicBlock &MBB = *II->getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
@ -403,47 +419,11 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
|
||||
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
|
||||
unsigned NF = ZvlssegInfo->first;
|
||||
unsigned LMUL = ZvlssegInfo->second;
|
||||
assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
|
||||
unsigned Opcode, SubRegIdx;
|
||||
switch (LMUL) {
|
||||
default:
|
||||
llvm_unreachable("LMUL must be 1, 2, or 4.");
|
||||
case 1:
|
||||
Opcode = RISCV::VS1R_V;
|
||||
SubRegIdx = RISCV::sub_vrm1_0;
|
||||
break;
|
||||
case 2:
|
||||
Opcode = RISCV::VS2R_V;
|
||||
SubRegIdx = RISCV::sub_vrm2_0;
|
||||
break;
|
||||
case 4:
|
||||
Opcode = RISCV::VS4R_V;
|
||||
SubRegIdx = RISCV::sub_vrm4_0;
|
||||
break;
|
||||
}
|
||||
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
|
||||
"Unexpected subreg numbering");
|
||||
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
|
||||
"Unexpected subreg numbering");
|
||||
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
|
||||
"Unexpected subreg numbering");
|
||||
unsigned NumRegs = NF * LMUL;
|
||||
assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
|
||||
|
||||
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
// Optimize for constant VLEN.
|
||||
if (auto VLEN = STI.getRealVLen()) {
|
||||
const int64_t VLENB = *VLEN / 8;
|
||||
int64_t Offset = VLENB * LMUL;
|
||||
STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
|
||||
} else {
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
|
||||
uint32_t ShiftAmount = Log2_32(LMUL);
|
||||
if (ShiftAmount != 0)
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
|
||||
.addReg(VL)
|
||||
.addImm(ShiftAmount);
|
||||
}
|
||||
|
||||
Register SrcReg = II->getOperand(0).getReg();
|
||||
Register Reg = II->getOperand(0).getReg();
|
||||
uint16_t RegEncoding = TRI->getEncodingValue(Reg);
|
||||
Register Base = II->getOperand(1).getReg();
|
||||
bool IsBaseKill = II->getOperand(1).isKill();
|
||||
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
@ -451,100 +431,63 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
|
||||
auto *OldMMO = *(II->memoperands_begin());
|
||||
LocationSize OldLoc = OldMMO->getSize();
|
||||
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
|
||||
TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
|
||||
auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
|
||||
for (unsigned I = 0; I < NF; ++I) {
|
||||
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
|
||||
|
||||
Register VLENB = 0;
|
||||
unsigned PreHandledNum = 0;
|
||||
unsigned I = 0;
|
||||
while (I != NumRegs) {
|
||||
auto [LMulHandled, RegClass, Opcode] =
|
||||
getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
|
||||
auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
|
||||
bool IsLast = I + RegNumHandled == NumRegs;
|
||||
if (PreHandledNum) {
|
||||
Register Step;
|
||||
// Optimize for constant VLEN.
|
||||
if (auto VLEN = STI.getRealVLen()) {
|
||||
int64_t Offset = *VLEN / 8 * PreHandledNum;
|
||||
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
|
||||
} else {
|
||||
if (!VLENB) {
|
||||
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
|
||||
}
|
||||
uint32_t ShiftAmount = Log2_32(PreHandledNum);
|
||||
if (ShiftAmount == 0)
|
||||
Step = VLENB;
|
||||
else {
|
||||
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
|
||||
.addReg(VLENB, getKillRegState(IsLast))
|
||||
.addImm(ShiftAmount);
|
||||
}
|
||||
}
|
||||
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
|
||||
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
|
||||
.addReg(Step, getKillRegState(Step != VLENB || IsLast));
|
||||
Base = NewBase;
|
||||
}
|
||||
|
||||
MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, II, DL, TII->get(Opcode))
|
||||
.addReg(ActualReg, getDefRegState(!IsSpill))
|
||||
.addReg(Base, getKillRegState(IsLast))
|
||||
.addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
|
||||
VRegSize * RegNumHandled));
|
||||
|
||||
// Adding implicit-use of super register to describe we are using part of
|
||||
// super register, that prevents machine verifier complaining when part of
|
||||
// subreg is undef, see comment in MachineVerifier::checkLiveness for more
|
||||
// detail.
|
||||
BuildMI(MBB, II, DL, TII->get(Opcode))
|
||||
.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
|
||||
.addReg(Base, getKillRegState(I == NF - 1))
|
||||
.addMemOperand(NewMMO)
|
||||
.addReg(SrcReg, RegState::Implicit);
|
||||
if (I != NF - 1)
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
|
||||
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
|
||||
.addReg(VL, getKillRegState(I == NF - 2));
|
||||
Base = NewBase;
|
||||
}
|
||||
II->eraseFromParent();
|
||||
}
|
||||
if (IsSpill)
|
||||
MIB.addReg(Reg, RegState::Implicit);
|
||||
|
||||
// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by
|
||||
// LMUL*VLENB bytes.
|
||||
void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
MachineBasicBlock &MBB = *II->getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
|
||||
const TargetInstrInfo *TII = STI.getInstrInfo();
|
||||
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
|
||||
|
||||
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
|
||||
unsigned NF = ZvlssegInfo->first;
|
||||
unsigned LMUL = ZvlssegInfo->second;
|
||||
assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
|
||||
unsigned Opcode, SubRegIdx;
|
||||
switch (LMUL) {
|
||||
default:
|
||||
llvm_unreachable("LMUL must be 1, 2, or 4.");
|
||||
case 1:
|
||||
Opcode = RISCV::VL1RE8_V;
|
||||
SubRegIdx = RISCV::sub_vrm1_0;
|
||||
break;
|
||||
case 2:
|
||||
Opcode = RISCV::VL2RE8_V;
|
||||
SubRegIdx = RISCV::sub_vrm2_0;
|
||||
break;
|
||||
case 4:
|
||||
Opcode = RISCV::VL4RE8_V;
|
||||
SubRegIdx = RISCV::sub_vrm4_0;
|
||||
break;
|
||||
}
|
||||
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
|
||||
"Unexpected subreg numbering");
|
||||
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
|
||||
"Unexpected subreg numbering");
|
||||
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
|
||||
"Unexpected subreg numbering");
|
||||
|
||||
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
// Optimize for constant VLEN.
|
||||
if (auto VLEN = STI.getRealVLen()) {
|
||||
const int64_t VLENB = *VLEN / 8;
|
||||
int64_t Offset = VLENB * LMUL;
|
||||
STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
|
||||
} else {
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
|
||||
uint32_t ShiftAmount = Log2_32(LMUL);
|
||||
if (ShiftAmount != 0)
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
|
||||
.addReg(VL)
|
||||
.addImm(ShiftAmount);
|
||||
}
|
||||
|
||||
Register DestReg = II->getOperand(0).getReg();
|
||||
Register Base = II->getOperand(1).getReg();
|
||||
bool IsBaseKill = II->getOperand(1).isKill();
|
||||
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
auto *OldMMO = *(II->memoperands_begin());
|
||||
LocationSize OldLoc = OldMMO->getSize();
|
||||
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
|
||||
TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
|
||||
auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
|
||||
for (unsigned I = 0; I < NF; ++I) {
|
||||
BuildMI(MBB, II, DL, TII->get(Opcode),
|
||||
TRI->getSubReg(DestReg, SubRegIdx + I))
|
||||
.addReg(Base, getKillRegState(I == NF - 1))
|
||||
.addMemOperand(NewMMO);
|
||||
if (I != NF - 1)
|
||||
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
|
||||
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
|
||||
.addReg(VL, getKillRegState(I == NF - 2));
|
||||
Base = NewBase;
|
||||
PreHandledNum = RegNumHandled;
|
||||
RegEncoding += RegNumHandled;
|
||||
I += RegNumHandled;
|
||||
}
|
||||
II->eraseFromParent();
|
||||
}
|
||||
@ -635,9 +578,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
}
|
||||
|
||||
// Handle spill/fill of synthetic register classes for segment operations to
|
||||
// ensure correctness in the edge case one gets spilled. There are many
|
||||
// possible optimizations here, but given the extreme rarity of such spills,
|
||||
// we prefer simplicity of implementation for now.
|
||||
// ensure correctness in the edge case one gets spilled.
|
||||
switch (MI.getOpcode()) {
|
||||
case RISCV::PseudoVSPILL2_M1:
|
||||
case RISCV::PseudoVSPILL2_M2:
|
||||
@ -650,7 +591,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
case RISCV::PseudoVSPILL6_M1:
|
||||
case RISCV::PseudoVSPILL7_M1:
|
||||
case RISCV::PseudoVSPILL8_M1:
|
||||
lowerVSPILL(II);
|
||||
lowerSegmentSpillReload(II, /*IsSpill=*/true);
|
||||
return true;
|
||||
case RISCV::PseudoVRELOAD2_M1:
|
||||
case RISCV::PseudoVRELOAD2_M2:
|
||||
@ -663,7 +604,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
case RISCV::PseudoVRELOAD6_M1:
|
||||
case RISCV::PseudoVRELOAD7_M1:
|
||||
case RISCV::PseudoVRELOAD8_M1:
|
||||
lowerVRELOAD(II);
|
||||
lowerSegmentSpillReload(II, /*IsSpill=*/false);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1052,3 +993,12 @@ bool RISCVRegisterInfo::getRegAllocationHints(
|
||||
|
||||
return BaseImplRetVal;
|
||||
}
|
||||
|
||||
Register
|
||||
RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
|
||||
uint16_t Encoding) const {
|
||||
MCRegister Reg = RISCV::V0 + Encoding;
|
||||
if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
|
||||
return Reg;
|
||||
return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
|
||||
}
|
||||
|
@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
|
||||
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
|
||||
int Idx) const override;
|
||||
|
||||
void lowerVSPILL(MachineBasicBlock::iterator II) const;
|
||||
void lowerVRELOAD(MachineBasicBlock::iterator II) const;
|
||||
void lowerSegmentSpillReload(MachineBasicBlock::iterator II,
|
||||
bool IsSpill) const;
|
||||
|
||||
Register getFrameRegister(const MachineFunction &MF) const override;
|
||||
|
||||
@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
|
||||
const MachineFunction &MF, const VirtRegMap *VRM,
|
||||
const LiveRegMatrix *Matrix) const override;
|
||||
|
||||
Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
|
||||
uint16_t Encoding) const;
|
||||
|
||||
static bool isVRRegClass(const TargetRegisterClass *RC) {
|
||||
return RISCVRI::isVRegClass(RC->TSFlags) &&
|
||||
RISCVRI::getNF(RC->TSFlags) == 1;
|
||||
|
@ -40,15 +40,7 @@ define void @_Z3foov() {
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
|
||||
; CHECK-NEXT: vle16.v v12, (a0)
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: csrr a1, vlenb
|
||||
; CHECK-NEXT: slli a1, a1, 1
|
||||
; CHECK-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
|
||||
; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
|
||||
; CHECK-NEXT: #APP
|
||||
@ -59,15 +51,7 @@ define void @_Z3foov() {
|
||||
; CHECK-NEXT: addi a0, a0, 928
|
||||
; CHECK-NEXT: vmsbc.vx v0, v8, a0
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: csrr a1, vlenb
|
||||
; CHECK-NEXT: slli a1, a1, 1
|
||||
; CHECK-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload
|
||||
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: slli a0, a0, 3
|
||||
; CHECK-NEXT: add a0, sp, a0
|
||||
|
@ -32,11 +32,7 @@ define void @last_chance_recoloring_failure() {
|
||||
; CHECK-NEXT: slli a0, a0, 3
|
||||
; CHECK-NEXT: add a0, sp, a0
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: csrr a1, vlenb
|
||||
; CHECK-NEXT: slli a1, a1, 2
|
||||
; CHECK-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill
|
||||
; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
|
||||
; CHECK-NEXT: li s0, 36
|
||||
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
|
||||
; CHECK-NEXT: vfwadd.vv v16, v8, v12, v0.t
|
||||
@ -47,11 +43,7 @@ define void @last_chance_recoloring_failure() {
|
||||
; CHECK-NEXT: slli a0, a0, 3
|
||||
; CHECK-NEXT: add a0, sp, a0
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: csrr a1, vlenb
|
||||
; CHECK-NEXT: slli a1, a1, 2
|
||||
; CHECK-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
|
||||
; CHECK-NEXT: add a0, a0, a1
|
||||
; CHECK-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
|
||||
; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
|
||||
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
|
||||
@ -92,11 +84,7 @@ define void @last_chance_recoloring_failure() {
|
||||
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
|
||||
; SUBREGLIVENESS-NEXT: add a0, sp, a0
|
||||
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
|
||||
; SUBREGLIVENESS-NEXT: csrr a1, vlenb
|
||||
; SUBREGLIVENESS-NEXT: slli a1, a1, 2
|
||||
; SUBREGLIVENESS-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
|
||||
; SUBREGLIVENESS-NEXT: add a0, a0, a1
|
||||
; SUBREGLIVENESS-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill
|
||||
; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
|
||||
; SUBREGLIVENESS-NEXT: li s0, 36
|
||||
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
|
||||
; SUBREGLIVENESS-NEXT: vfwadd.vv v16, v8, v12, v0.t
|
||||
@ -107,11 +95,7 @@ define void @last_chance_recoloring_failure() {
|
||||
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
|
||||
; SUBREGLIVENESS-NEXT: add a0, sp, a0
|
||||
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
|
||||
; SUBREGLIVENESS-NEXT: csrr a1, vlenb
|
||||
; SUBREGLIVENESS-NEXT: slli a1, a1, 2
|
||||
; SUBREGLIVENESS-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
|
||||
; SUBREGLIVENESS-NEXT: add a0, a0, a1
|
||||
; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
|
||||
; SUBREGLIVENESS-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
|
||||
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
|
||||
; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
|
||||
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
|
||||
|
@ -41,14 +41,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -64,15 +61,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
|
||||
@ -108,14 +101,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
|
||||
@ -161,14 +151,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -184,15 +171,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
|
||||
@ -228,14 +211,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
|
||||
@ -283,17 +263,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -309,15 +284,11 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
|
||||
@ -353,17 +324,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
|
||||
@ -411,17 +377,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -437,15 +398,11 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
|
||||
@ -481,17 +438,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
|
||||
@ -540,23 +492,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
; SPILL-O2-NEXT: li a1, 6
|
||||
; SPILL-O2-NEXT: mul a0, a0, a1
|
||||
@ -571,21 +519,17 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: ret
|
||||
@ -621,23 +565,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: li a1, 6
|
||||
; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
|
||||
|
@ -41,14 +41,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -64,15 +61,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
|
||||
@ -108,14 +101,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
|
||||
@ -161,14 +151,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -184,15 +171,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 16
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
|
||||
@ -228,14 +211,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
|
||||
@ -283,17 +263,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -309,15 +284,11 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
|
||||
@ -353,17 +324,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
|
||||
@ -411,17 +377,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
@ -437,15 +398,11 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
|
||||
@ -481,17 +438,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
|
||||
@ -540,23 +492,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-NEXT: #APP
|
||||
; SPILL-O2-NEXT: #NO_APP
|
||||
; SPILL-O2-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-NEXT: csrr a0, vlenb
|
||||
; SPILL-O2-NEXT: li a1, 6
|
||||
; SPILL-O2-NEXT: mul a0, a0, a1
|
||||
@ -571,21 +519,17 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 64
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VLEN128-NEXT: #APP
|
||||
; SPILL-O2-VLEN128-NEXT: #NO_APP
|
||||
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: li a1, 32
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
|
||||
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
|
||||
; SPILL-O2-VLEN128-NEXT: ret
|
||||
@ -621,23 +565,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
|
||||
; SPILL-O2-VSETVLI-NEXT: #APP
|
||||
; SPILL-O2-VSETVLI-NEXT: #NO_APP
|
||||
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
|
||||
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
|
||||
; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
|
||||
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
|
||||
; SPILL-O2-VSETVLI-NEXT: li a1, 6
|
||||
; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
|
||||
|
@ -2,15 +2,15 @@
|
||||
# RUN: llc -mtriple=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||||
target triple = "riscv64"
|
||||
|
||||
define void @zvlsseg_spill(ptr %base, i64 %vl) {
|
||||
define void @zvlsseg_spill_0(ptr %base, i64 %vl) {
|
||||
ret void
|
||||
}
|
||||
define void @zvlsseg_spill_1(ptr %base, i64 %vl) {
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: zvlsseg_spill
|
||||
name: zvlsseg_spill_0
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector }
|
||||
@ -18,7 +18,7 @@ body: |
|
||||
bb.0:
|
||||
liveins: $x10, $x11
|
||||
|
||||
; CHECK-LABEL: name: zvlsseg_spill
|
||||
; CHECK-LABEL: name: zvlsseg_spill_0
|
||||
; CHECK: liveins: $x10, $x11
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16
|
||||
@ -30,35 +30,22 @@ body: |
|
||||
; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
|
||||
; CHECK-NEXT: $x11 = ADDI $x2, 16
|
||||
; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = PseudoReadVLENB
|
||||
; CHECK-NEXT: VS1R_V $v0, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS1R_V $v2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS1R_V $v3, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS1R_V $v4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS1R_V $v5, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x13 = SLLI $x12, 2
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
|
||||
; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
|
||||
; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADDI $x2, 16
|
||||
; CHECK-NEXT: $x12 = PseudoReadVLENB
|
||||
; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $x12 = PseudoReadVLENB
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: $v8 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: $v9 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: $v10 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: $v11 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: $v12 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
|
||||
; CHECK-NEXT: $v13 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: $v12m2 = VL2RE8_V killed $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
|
||||
; CHECK-NEXT: VS1R_V killed $v8, killed renamable $x10
|
||||
; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB
|
||||
; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3
|
||||
@ -75,3 +62,56 @@ body: |
|
||||
VS1R_V killed $v8, %0:gpr
|
||||
PseudoRET
|
||||
...
|
||||
|
||||
---
|
||||
name: zvlsseg_spill_1
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x10, $x11
|
||||
; CHECK-LABEL: name: zvlsseg_spill_1
|
||||
; CHECK: liveins: $x10, $x11
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB
|
||||
; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3
|
||||
; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK-NEXT: $v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
|
||||
; CHECK-NEXT: $x11 = ADDI $x2, 16
|
||||
; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s64>) into %stack.0)
|
||||
; CHECK-NEXT: $x12 = PseudoReadVLENB
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
|
||||
; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
|
||||
; CHECK-NEXT: VS4R_V $v4m4, killed $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
|
||||
; CHECK-NEXT: $x11 = ADDI $x2, 16
|
||||
; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = PseudoReadVLENB
|
||||
; CHECK-NEXT: $x13 = SLLI $x12, 1
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
|
||||
; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
|
||||
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
|
||||
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
|
||||
; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
|
||||
; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10
|
||||
; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB
|
||||
; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3
|
||||
; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10
|
||||
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $x2, 16
|
||||
; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 16
|
||||
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
|
||||
; CHECK-NEXT: PseudoRET
|
||||
%0:gpr = COPY $x10
|
||||
%1:gprnox0 = COPY $x11
|
||||
$v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, %0, %1, 6, 0
|
||||
PseudoVSPILL7_M1 killed renamable $v1_v2_v3_v4_v5_v6_v7, %stack.0 :: (store (<vscale x 7 x s64>) into %stack.0, align 8)
|
||||
renamable $v10_v11_v12_v13_v14_v15_v16 = PseudoVRELOAD7_M1 %stack.0 :: (load (<vscale x 7 x s64>) from %stack.0, align 8)
|
||||
VS1R_V killed $v10, %0:gpr
|
||||
PseudoRET
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user