[HEXAGON] Fix corner cases for hwloops pass (#135439)
Add check to make sure Dist > 0 or Dist < 0 for appropriate cmp cases to hexagon hardware loops pass. The change modifies the HexagonHardwareLoops pass to add runtime checks to make sure that end_value > initial_value for less than comparisons and end_value < initial_value for greater than comparisons. Fix for https://github.com/llvm/llvm-project/issues/133241 @androm3da @iajbar PTAL --------- Co-authored-by: aankit-quic <aankit@quicinc.com> (cherry picked from commit da8ce56c53fe6e34809ba0b310fa90257e230a89)
This commit is contained in:
parent
78f6719ca9
commit
89adc2d4f9
@ -731,6 +731,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
Register IVReg,
|
||||
int64_t IVBump,
|
||||
Comparison::Kind Cmp) const {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n");
|
||||
// Cannot handle comparison EQ, i.e. while (A == B).
|
||||
if (Cmp == Comparison::EQ)
|
||||
return nullptr;
|
||||
@ -846,6 +851,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
if (IVBump < 0) {
|
||||
std::swap(Start, End);
|
||||
IVBump = -IVBump;
|
||||
std::swap(CmpLess, CmpGreater);
|
||||
}
|
||||
// Cmp may now have a wrong direction, e.g. LEs may now be GEs.
|
||||
// Signedness, and "including equality" are preserved.
|
||||
@ -989,7 +995,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
CountSR = 0;
|
||||
}
|
||||
|
||||
return new CountValue(CountValue::CV_Register, CountR, CountSR);
|
||||
const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
|
||||
Register MuxR = CountR;
|
||||
unsigned MuxSR = CountSR;
|
||||
// For the loop count to be valid unsigned number, CmpLess should imply
|
||||
// Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the
|
||||
// check if the initial distance is zero and the comparison is LTu || LTEu.
|
||||
if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) &&
|
||||
CmpLess) &&
|
||||
(CmpLess || CmpGreater)) {
|
||||
// Generate:
|
||||
// DistCheck = CMP_GT DistR, 0 --> CmpLess
|
||||
// DistCheck = CMP_GT DistR, -1 --> CmpGreater
|
||||
Register DistCheckR = MRI->createVirtualRegister(PredRC);
|
||||
const MCInstrDesc &DistCheckD = TII->get(Hexagon::C2_cmpgti);
|
||||
BuildMI(*PH, InsertPos, DL, DistCheckD, DistCheckR)
|
||||
.addReg(DistR, 0, DistSR)
|
||||
.addImm((CmpLess) ? 0 : -1);
|
||||
|
||||
// Generate:
|
||||
// MUXR = MUX DistCheck, CountR, 1 --> CmpLess
|
||||
// MUXR = MUX DistCheck, 1, CountR --> CmpGreater
|
||||
MuxR = MRI->createVirtualRegister(IntRC);
|
||||
if (CmpLess) {
|
||||
const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxir);
|
||||
BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
|
||||
.addReg(DistCheckR)
|
||||
.addReg(CountR, 0, CountSR)
|
||||
.addImm(1);
|
||||
} else {
|
||||
const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxri);
|
||||
BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
|
||||
.addReg(DistCheckR)
|
||||
.addImm(1)
|
||||
.addReg(CountR, 0, CountSR);
|
||||
}
|
||||
MuxSR = 0;
|
||||
}
|
||||
|
||||
return new CountValue(CountValue::CV_Register, MuxR, MuxSR);
|
||||
}
|
||||
|
||||
/// Return true if the operation is invalid within hardware loop.
|
||||
|
277
llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir
Normal file
277
llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir
Normal file
@ -0,0 +1,277 @@
|
||||
# RUN: llc --mtriple=hexagon -run-pass=hwloops %s -o - | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: name: f
|
||||
# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
|
||||
# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
|
||||
# CHECK-LABEL: name: g
|
||||
# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
|
||||
# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
|
||||
--- |
|
||||
@a = dso_local global [255 x ptr] zeroinitializer, align 8
|
||||
|
||||
; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
|
||||
define dso_local void @f(i32 noundef %m) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%cond = tail call i32 @llvm.smax.i32(i32 %m, i32 2)
|
||||
%0 = add nsw i32 %cond, -4
|
||||
%1 = shl i32 %cond, 3
|
||||
%cgep = getelementptr i8, ptr @a, i32 %1
|
||||
%cgep36 = bitcast ptr @a to ptr
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.body, %entry
|
||||
%lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
|
||||
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
|
||||
%sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
|
||||
%shr = lshr i32 %sh.0, 1
|
||||
%cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
|
||||
store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
|
||||
%lsr.iv.next = add nsw i32 %lsr.iv, 4
|
||||
%cmp1 = icmp samesign ult i32 %lsr.iv.next, 1073741836
|
||||
%cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
|
||||
br i1 %cmp1, label %do.body, label %do.end, !llvm.loop !9
|
||||
|
||||
do.end: ; preds = %do.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
|
||||
define dso_local void @g(i32 noundef %m) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = add i32 %m, -4
|
||||
%1 = shl i32 %m, 3
|
||||
%cgep = getelementptr i8, ptr @a, i32 %1
|
||||
%cgep36 = bitcast ptr @a to ptr
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.body, %entry
|
||||
%lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
|
||||
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
|
||||
%sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
|
||||
%shr = lshr i32 %sh.0, 1
|
||||
%cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
|
||||
store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
|
||||
%lsr.iv.next = add i32 %lsr.iv, 4
|
||||
%cmp = icmp slt i32 %lsr.iv.next, 1073741836
|
||||
%cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
|
||||
br i1 %cmp, label %do.body, label %do.end, !llvm.loop !11
|
||||
|
||||
do.end: ; preds = %do.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.smax.i32(i32, i32) #1
|
||||
|
||||
!llvm.module.flags = !{!0, !1, !2, !3}
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 8, !"PIC Level", i32 2}
|
||||
!2 = !{i32 7, !"PIE Level", i32 2}
|
||||
!3 = !{i32 7, !"frame-pointer", i32 2}
|
||||
!5 = !{!6, !6, i64 0}
|
||||
!6 = !{!"any pointer", !7, i64 0}
|
||||
!7 = !{!"omnipotent char", !8, i64 0}
|
||||
!8 = !{!"Simple C/C++ TBAA"}
|
||||
!9 = distinct !{!9, !10}
|
||||
!10 = !{!"llvm.loop.mustprogress"}
|
||||
!11 = distinct !{!11, !10}
|
||||
|
||||
...
|
||||
---
|
||||
name: f
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
noPhis: false
|
||||
isSSA: true
|
||||
noVRegs: false
|
||||
hasFakeUses: false
|
||||
callsEHReturn: false
|
||||
callsUnwindInit: false
|
||||
hasEHScopes: false
|
||||
hasEHFunclets: false
|
||||
isOutlined: false
|
||||
debugInstrRef: false
|
||||
failsVerification: false
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
- { id: 0, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 1, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 2, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 3, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 4, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 5, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 6, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 7, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 8, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 9, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 10, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 11, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 12, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 13, class: predregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 14, class: predregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 15, class: intregs, preferred-register: '', flags: [ ] }
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '%9' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 1
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
functionContext: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
hasTailCall: false
|
||||
isCalleeSavedInfoValid: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
entry_values: []
|
||||
callSites: []
|
||||
debugValueSubstitutions: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0
|
||||
|
||||
%9:intregs = COPY $r0
|
||||
%11:intregs = A2_tfrsi 2
|
||||
%12:intregs = A2_max %9, %11
|
||||
%0:intregs = nsw A2_addi %12, -4
|
||||
%1:intregs = S4_addi_asl_ri @a, %12, 3
|
||||
%2:intregs = A2_tfrsi @a
|
||||
%10:intregs = A2_tfrsi 256
|
||||
|
||||
bb.1.do.body:
|
||||
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
|
||||
%3:intregs = PHI %1, %bb.0, %8, %bb.1
|
||||
%4:intregs = PHI %0, %bb.0, %7, %bb.1
|
||||
%5:intregs = PHI %10, %bb.0, %15, %bb.1
|
||||
%15:intregs = S2_extractu %5, 8, 1
|
||||
S4_storeri_rr %2, %15, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
|
||||
%7:intregs = nsw A2_addi %4, 4
|
||||
%13:predregs = C2_cmpgtui %7, 1073741835
|
||||
%8:intregs = A2_addi %3, 32
|
||||
J2_jumpf %13, %bb.1, implicit-def dead $pc
|
||||
J2_jump %bb.2, implicit-def dead $pc
|
||||
|
||||
bb.2.do.end:
|
||||
PS_jmpret $r31, implicit-def dead $pc
|
||||
|
||||
...
|
||||
---
|
||||
name: g
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
noPhis: false
|
||||
isSSA: true
|
||||
noVRegs: false
|
||||
hasFakeUses: false
|
||||
callsEHReturn: false
|
||||
callsUnwindInit: false
|
||||
hasEHScopes: false
|
||||
hasEHFunclets: false
|
||||
isOutlined: false
|
||||
debugInstrRef: false
|
||||
failsVerification: false
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
- { id: 0, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 1, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 2, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 3, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 4, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 5, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 6, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 7, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 8, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 9, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 10, class: intregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 11, class: predregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 12, class: predregs, preferred-register: '', flags: [ ] }
|
||||
- { id: 13, class: intregs, preferred-register: '', flags: [ ] }
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '%9' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 1
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
functionContext: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
hasTailCall: false
|
||||
isCalleeSavedInfoValid: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
entry_values: []
|
||||
callSites: []
|
||||
debugValueSubstitutions: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x80000000)
|
||||
liveins: $r0
|
||||
|
||||
%9:intregs = COPY $r0
|
||||
%0:intregs = A2_addi %9, -4
|
||||
%1:intregs = S4_addi_asl_ri @a, %9, 3
|
||||
%2:intregs = A2_tfrsi @a
|
||||
%10:intregs = A2_tfrsi 256
|
||||
|
||||
bb.1.do.body:
|
||||
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
|
||||
|
||||
%3:intregs = PHI %1, %bb.0, %8, %bb.1
|
||||
%4:intregs = PHI %0, %bb.0, %7, %bb.1
|
||||
%5:intregs = PHI %10, %bb.0, %13, %bb.1
|
||||
%13:intregs = S2_extractu %5, 8, 1
|
||||
S4_storeri_rr %2, %13, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
|
||||
%7:intregs = A2_addi %4, 4
|
||||
%11:predregs = C2_cmpgti %7, 1073741835
|
||||
%8:intregs = A2_addi %3, 32
|
||||
J2_jumpf %11, %bb.1, implicit-def dead $pc
|
||||
J2_jump %bb.2, implicit-def dead $pc
|
||||
|
||||
bb.2.do.end:
|
||||
PS_jmpret $r31, implicit-def dead $pc
|
||||
|
||||
...
|
@ -5,8 +5,9 @@
|
||||
; the same stage.
|
||||
|
||||
; CHECK-DAG: [[REG3:(r[0-9]+)]] = add([[REG1:(r[0-9]+)]],#-1)
|
||||
; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG1]],#-1)
|
||||
; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG3]])
|
||||
; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG4:(r[0-9]+)]],#-1)
|
||||
; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG2]])
|
||||
; CHECK-NOT: = [[REG3]]
|
||||
; CHECK-NOT: = [[REG2]]
|
||||
; CHECK: .LBB0_[[LOOP]]:
|
||||
; CHECK: }{{[ \t]*}}:endloop
|
||||
|
Loading…
x
Reference in New Issue
Block a user