[AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806)
Commit a6293228fdd5aba8c04c63f02f3d017443feb3f2 forced the register class of ZPR[24]StridedOrContiguous for spills/fills of ZPR2 and ZPR4, but this may result in issues when the regclass for the fill is a ZPR2/ZPR4 which would allow the register allocator to pick `z1_z2`, which is not a supported register for ZPR2StridedOrContiguous that only supports tuples of the form (strided) `z0_z8`, `z1_z9` or (contiguous, start at multiple of 2) `z0_z1`, `z2_z3`. For spills we could add a new register class that supports any of the tuple forms, but I've decided to use two pseudos similar to the fills for consistency. Fixes https://github.com/llvm/llvm-project/issues/148655
This commit is contained in:
parent
bda56023c9
commit
50e345ef95
@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
"Non-writeback variants of STGloop / STZGloop should not "
|
||||
"survive past PrologEpilogInserter.");
|
||||
case AArch64::STR_ZZZZXI:
|
||||
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
|
||||
case AArch64::STR_ZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
|
||||
case AArch64::STR_ZZXI:
|
||||
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
|
||||
case AArch64::STR_PPXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
|
||||
case AArch64::LDR_ZZZZXI:
|
||||
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
|
||||
case AArch64::LDR_ZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
|
||||
case AArch64::LDR_ZZXI:
|
||||
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
|
||||
case AArch64::LDR_PPXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
|
||||
|
@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
||||
case AArch64::LDR_PXI:
|
||||
case AArch64::LDR_ZXI:
|
||||
case AArch64::LDR_ZZXI:
|
||||
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::LDR_ZZZXI:
|
||||
case AArch64::LDR_ZZZZXI:
|
||||
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDRBui:
|
||||
case AArch64::LDRDui:
|
||||
@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
||||
case AArch64::STR_PXI:
|
||||
case AArch64::STR_ZXI:
|
||||
case AArch64::STR_ZZXI:
|
||||
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::STR_ZZZXI:
|
||||
case AArch64::STR_ZZZZXI:
|
||||
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::STRBBui:
|
||||
case AArch64::STRBui:
|
||||
case AArch64::STRDui:
|
||||
@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
||||
break;
|
||||
// SVE
|
||||
case AArch64::STR_ZZZZXI:
|
||||
case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::LDR_ZZZZXI:
|
||||
case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
|
||||
Scale = TypeSize::getScalable(16);
|
||||
Width = TypeSize::getScalable(16 * 4);
|
||||
MinOffset = -256;
|
||||
@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
||||
MaxOffset = 253;
|
||||
break;
|
||||
case AArch64::STR_ZZXI:
|
||||
case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
case AArch64::LDR_ZZXI:
|
||||
case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
|
||||
Scale = TypeSize::getScalable(16);
|
||||
Width = TypeSize::getScalable(16 * 2);
|
||||
MinOffset = -256;
|
||||
@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Twov2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
|
||||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register store without SVE store instructions");
|
||||
Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
|
||||
StackID = TargetStackID::ScalableVector;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register store without SVE store instructions");
|
||||
Opc = AArch64::STR_ZZXI;
|
||||
@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Fourv2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
|
||||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register store without SVE store instructions");
|
||||
Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
|
||||
StackID = TargetStackID::ScalableVector;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register store without SVE store instructions");
|
||||
Opc = AArch64::STR_ZZZZXI;
|
||||
@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Twov2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
|
||||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
} else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register load without SVE load instructions");
|
||||
Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
|
||||
StackID = TargetStackID::ScalableVector;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register load without SVE load instructions");
|
||||
Opc = AArch64::LDR_ZZXI;
|
||||
@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Fourv2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
|
||||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
} else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register load without SVE load instructions");
|
||||
Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
|
||||
StackID = TargetStackID::ScalableVector;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
|
||||
"Unexpected register load without SVE load instructions");
|
||||
Opc = AArch64::LDR_ZZZZXI;
|
||||
|
@ -2625,15 +2625,21 @@ let Predicates = [HasSVE_or_SME] in {
|
||||
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
|
||||
// AArch64ExpandPseudoInsts.
|
||||
let mayLoad = 1, hasSideEffects = 0 in {
|
||||
def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
|
||||
def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
}
|
||||
let mayStore = 1, hasSideEffects = 0 in {
|
||||
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
|
||||
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND
|
||||
--- |
|
||||
; ModuleID = '<stdin>'
|
||||
source_filename = "<stdin>"
|
||||
@ -14,13 +14,14 @@
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable }
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
||||
attributes #1 = { nounwind "target-features"="+sve2p1" }
|
||||
attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" }
|
||||
|
||||
...
|
||||
---
|
||||
@ -318,10 +319,10 @@ registers:
|
||||
- { id: 0, class: zpr2 }
|
||||
stack:
|
||||
liveins:
|
||||
- { reg: '$z0_z1', virtual-reg: '%0' }
|
||||
- { reg: '$z1_z2', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0_z1
|
||||
liveins: $z1_z2
|
||||
|
||||
; CHECK-LABEL: name: spills_fills_stack_id_zpr2
|
||||
; CHECK: stack:
|
||||
@ -329,12 +330,12 @@ body: |
|
||||
; CHECK-NEXT: stack-id: scalable-vector
|
||||
|
||||
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
|
||||
; EXPAND: STR_ZXI $z0, $sp, 0
|
||||
; EXPAND: STR_ZXI $z1, $sp, 1
|
||||
; EXPAND: $z0 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 1
|
||||
; EXPAND: STR_ZXI $z1, $sp, 0
|
||||
; EXPAND: STR_ZXI $z2, $sp, 1
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z2 = LDR_ZXI $sp, 1
|
||||
|
||||
%0:zpr2 = COPY $z0_z1
|
||||
%0:zpr2 = COPY $z1_z2
|
||||
|
||||
$z0_z1_z2_z3 = IMPLICIT_DEF
|
||||
$z4_z5_z6_z7 = IMPLICIT_DEF
|
||||
@ -345,7 +346,7 @@ body: |
|
||||
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||
|
||||
$z0_z1 = COPY %0
|
||||
$z1_z2 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
@ -439,10 +440,10 @@ registers:
|
||||
- { id: 0, class: zpr4 }
|
||||
stack:
|
||||
liveins:
|
||||
- { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
|
||||
- { reg: '$z1_z2_z3_z4', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0_z1_z2_z3
|
||||
liveins: $z1_z2_z3_z4
|
||||
|
||||
; CHECK-LABEL: name: spills_fills_stack_id_zpr4
|
||||
; CHECK: stack:
|
||||
@ -450,16 +451,16 @@ body: |
|
||||
; CHECK-NEXT: stack-id: scalable-vector
|
||||
|
||||
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
|
||||
; EXPAND: STR_ZXI $z0, $sp, 0
|
||||
; EXPAND: STR_ZXI $z1, $sp, 1
|
||||
; EXPAND: STR_ZXI $z2, $sp, 2
|
||||
; EXPAND: STR_ZXI $z3, $sp, 3
|
||||
; EXPAND: $z0 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 1
|
||||
; EXPAND: $z2 = LDR_ZXI $sp, 2
|
||||
; EXPAND: $z3 = LDR_ZXI $sp, 3
|
||||
; EXPAND: STR_ZXI $z1, $sp, 0
|
||||
; EXPAND: STR_ZXI $z2, $sp, 1
|
||||
; EXPAND: STR_ZXI $z3, $sp, 2
|
||||
; EXPAND: STR_ZXI $z4, $sp, 3
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z2 = LDR_ZXI $sp, 1
|
||||
; EXPAND: $z3 = LDR_ZXI $sp, 2
|
||||
; EXPAND: $z4 = LDR_ZXI $sp, 3
|
||||
|
||||
%0:zpr4 = COPY $z0_z1_z2_z3
|
||||
%0:zpr4 = COPY $z1_z2_z3_z4
|
||||
|
||||
$z0_z1_z2_z3 = IMPLICIT_DEF
|
||||
$z4_z5_z6_z7 = IMPLICIT_DEF
|
||||
@ -470,7 +471,7 @@ body: |
|
||||
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||
|
||||
$z0_z1_z2_z3 = COPY %0
|
||||
$z1_z2_z3_z4 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
|
Loading…
x
Reference in New Issue
Block a user