[CodeGenPrepare] sinkCmpExpression - don't sink larger than legal integer comparisons (#166778)
A generic alternative to #166564 - make the assumption that expanding integer comparisons will be expensive if they are larger than the largest legal type so avoid sinking if they are also used in the current BB + any phis. Fixes #166534
This commit is contained in:
parent
bba40ab4bd
commit
5b20453062
@ -1839,7 +1839,8 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
|
||||
/// lose; some adjustment may be wanted there.
|
||||
///
|
||||
/// Return true if any changes are made.
|
||||
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
|
||||
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
|
||||
const DataLayout &DL) {
|
||||
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
|
||||
return false;
|
||||
|
||||
@ -1847,6 +1848,18 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
|
||||
if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
|
||||
return false;
|
||||
|
||||
bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
|
||||
return isa<PHINode>(U) ||
|
||||
cast<Instruction>(U)->getParent() == Cmp->getParent();
|
||||
});
|
||||
|
||||
// Avoid sinking larger than legal integer comparisons unless its ONLY used in
|
||||
// another BB.
|
||||
if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
|
||||
Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
|
||||
DL.getLargestLegalIntTypeSizeInBits())
|
||||
return false;
|
||||
|
||||
// Only insert a cmp in each block once.
|
||||
DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
|
||||
|
||||
@ -2224,7 +2237,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
|
||||
}
|
||||
|
||||
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
|
||||
if (sinkCmpExpression(Cmp, *TLI))
|
||||
if (sinkCmpExpression(Cmp, *TLI, *DL))
|
||||
return true;
|
||||
|
||||
if (combineToUAddWithOverflow(Cmp, ModifiedDT))
|
||||
|
||||
@ -232,7 +232,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
|
||||
ret i64 %Q
|
||||
}
|
||||
|
||||
; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
|
||||
; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.
|
||||
|
||||
define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
|
||||
; RV32-LABEL: uaddo4:
|
||||
@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
|
||||
; RV32-NEXT: .cfi_offset s4, -24
|
||||
; RV32-NEXT: .cfi_offset s5, -28
|
||||
; RV32-NEXT: .cfi_offset s6, -32
|
||||
; RV32-NEXT: mv s5, a5
|
||||
; RV32-NEXT: mv s3, a1
|
||||
; RV32-NEXT: mv s1, a5
|
||||
; RV32-NEXT: mv s4, a1
|
||||
; RV32-NEXT: andi a1, a5, 1
|
||||
; RV32-NEXT: beqz a1, .LBB32_8
|
||||
; RV32-NEXT: beqz a1, .LBB32_6
|
||||
; RV32-NEXT: # %bb.1: # %t
|
||||
; RV32-NEXT: mv s0, a4
|
||||
; RV32-NEXT: mv s2, a3
|
||||
; RV32-NEXT: mv s1, a2
|
||||
; RV32-NEXT: mv s4, a0
|
||||
; RV32-NEXT: beq s3, a3, .LBB32_3
|
||||
; RV32-NEXT: mv s3, a3
|
||||
; RV32-NEXT: mv s2, a2
|
||||
; RV32-NEXT: mv s5, a0
|
||||
; RV32-NEXT: beq s4, a3, .LBB32_3
|
||||
; RV32-NEXT: # %bb.2: # %t
|
||||
; RV32-NEXT: sltu s6, s3, s2
|
||||
; RV32-NEXT: sltu s6, s4, s3
|
||||
; RV32-NEXT: j .LBB32_4
|
||||
; RV32-NEXT: .LBB32_3:
|
||||
; RV32-NEXT: sltu s6, s4, s1
|
||||
; RV32-NEXT: sltu s6, s5, s2
|
||||
; RV32-NEXT: .LBB32_4: # %t
|
||||
; RV32-NEXT: mv a0, s6
|
||||
; RV32-NEXT: call call
|
||||
; RV32-NEXT: beqz s6, .LBB32_8
|
||||
; RV32-NEXT: beqz s6, .LBB32_6
|
||||
; RV32-NEXT: # %bb.5: # %end
|
||||
; RV32-NEXT: sltu a1, s4, s1
|
||||
; RV32-NEXT: mv a0, a1
|
||||
; RV32-NEXT: beq s3, s2, .LBB32_7
|
||||
; RV32-NEXT: # %bb.6: # %end
|
||||
; RV32-NEXT: sltu a0, s3, s2
|
||||
; RV32-NEXT: .LBB32_7: # %end
|
||||
; RV32-NEXT: sub a2, s3, s2
|
||||
; RV32-NEXT: sub a3, s4, s1
|
||||
; RV32-NEXT: sub a2, a2, a1
|
||||
; RV32-NEXT: sw a3, 0(s0)
|
||||
; RV32-NEXT: sw a2, 4(s0)
|
||||
; RV32-NEXT: j .LBB32_9
|
||||
; RV32-NEXT: .LBB32_8: # %f
|
||||
; RV32-NEXT: mv a0, s5
|
||||
; RV32-NEXT: .LBB32_9: # %f
|
||||
; RV32-NEXT: sltu a0, s5, s2
|
||||
; RV32-NEXT: sub a1, s4, s3
|
||||
; RV32-NEXT: sub a2, s5, s2
|
||||
; RV32-NEXT: sub a1, a1, a0
|
||||
; RV32-NEXT: sw a2, 0(s0)
|
||||
; RV32-NEXT: sw a1, 4(s0)
|
||||
; RV32-NEXT: mv a0, s6
|
||||
; RV32-NEXT: j .LBB32_7
|
||||
; RV32-NEXT: .LBB32_6: # %f
|
||||
; RV32-NEXT: mv a0, s1
|
||||
; RV32-NEXT: .LBB32_7: # %f
|
||||
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
|
||||
@ -7,100 +7,64 @@
|
||||
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
|
||||
; SSE2-LABEL: pr166534:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: movq 8(%rdi), %r8
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE2-NEXT: movq (%rsi), %r9
|
||||
; SSE2-NEXT: movq 8(%rsi), %rdi
|
||||
; SSE2-NEXT: movdqu (%rsi), %xmm1
|
||||
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %esi
|
||||
; SSE2-NEXT: xorl %r10d, %r10d
|
||||
; SSE2-NEXT: xorl %eax, %eax
|
||||
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
|
||||
; SSE2-NEXT: sete %r10b
|
||||
; SSE2-NEXT: orq %r10, (%rdx)
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: orq %rax, (%rdx)
|
||||
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
|
||||
; SSE2-NEXT: jne .LBB0_2
|
||||
; SSE2-NEXT: # %bb.1: # %if.then
|
||||
; SSE2-NEXT: xorq %r9, %rax
|
||||
; SSE2-NEXT: xorq %rdi, %r8
|
||||
; SSE2-NEXT: xorl %edx, %edx
|
||||
; SSE2-NEXT: orq %rax, %r8
|
||||
; SSE2-NEXT: sete %dl
|
||||
; SSE2-NEXT: orq %rdx, (%rcx)
|
||||
; SSE2-NEXT: orq %rax, (%rcx)
|
||||
; SSE2-NEXT: .LBB0_2: # %if.end
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE4-LABEL: pr166534:
|
||||
; SSE4: # %bb.0: # %entry
|
||||
; SSE4-NEXT: movq (%rdi), %rax
|
||||
; SSE4-NEXT: movq 8(%rdi), %r8
|
||||
; SSE4-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE4-NEXT: movq (%rsi), %r9
|
||||
; SSE4-NEXT: movq 8(%rsi), %rdi
|
||||
; SSE4-NEXT: movdqu (%rsi), %xmm1
|
||||
; SSE4-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE4-NEXT: xorl %esi, %esi
|
||||
; SSE4-NEXT: xorl %eax, %eax
|
||||
; SSE4-NEXT: ptest %xmm1, %xmm1
|
||||
; SSE4-NEXT: sete %sil
|
||||
; SSE4-NEXT: orq %rsi, (%rdx)
|
||||
; SSE4-NEXT: sete %al
|
||||
; SSE4-NEXT: orq %rax, (%rdx)
|
||||
; SSE4-NEXT: ptest %xmm1, %xmm1
|
||||
; SSE4-NEXT: jne .LBB0_2
|
||||
; SSE4-NEXT: # %bb.1: # %if.then
|
||||
; SSE4-NEXT: xorq %r9, %rax
|
||||
; SSE4-NEXT: xorq %rdi, %r8
|
||||
; SSE4-NEXT: xorl %edx, %edx
|
||||
; SSE4-NEXT: orq %rax, %r8
|
||||
; SSE4-NEXT: sete %dl
|
||||
; SSE4-NEXT: orq %rdx, (%rcx)
|
||||
; SSE4-NEXT: orq %rax, (%rcx)
|
||||
; SSE4-NEXT: .LBB0_2: # %if.end
|
||||
; SSE4-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: pr166534:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: movq 8(%rdi), %r8
|
||||
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX2-NEXT: movq (%rsi), %rdi
|
||||
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq 8(%rsi), %rsi
|
||||
; AVX2-NEXT: xorl %r9d, %r9d
|
||||
; AVX2-NEXT: xorl %eax, %eax
|
||||
; AVX2-NEXT: vptest %xmm0, %xmm0
|
||||
; AVX2-NEXT: sete %r9b
|
||||
; AVX2-NEXT: orq %r9, (%rdx)
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: orq %rax, (%rdx)
|
||||
; AVX2-NEXT: vptest %xmm0, %xmm0
|
||||
; AVX2-NEXT: jne .LBB0_2
|
||||
; AVX2-NEXT: # %bb.1: # %if.then
|
||||
; AVX2-NEXT: xorq %rdi, %rax
|
||||
; AVX2-NEXT: xorq %rsi, %r8
|
||||
; AVX2-NEXT: xorl %edx, %edx
|
||||
; AVX2-NEXT: orq %rax, %r8
|
||||
; AVX2-NEXT: sete %dl
|
||||
; AVX2-NEXT: orq %rdx, (%rcx)
|
||||
; AVX2-NEXT: orq %rax, (%rcx)
|
||||
; AVX2-NEXT: .LBB0_2: # %if.end
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: pr166534:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: movq (%rdi), %rax
|
||||
; AVX512-NEXT: movq 8(%rdi), %r8
|
||||
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX512-NEXT: movq (%rsi), %r9
|
||||
; AVX512-NEXT: movq 8(%rsi), %rdi
|
||||
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
|
||||
; AVX512-NEXT: xorl %esi, %esi
|
||||
; AVX512-NEXT: xorl %eax, %eax
|
||||
; AVX512-NEXT: vptest %xmm0, %xmm0
|
||||
; AVX512-NEXT: sete %sil
|
||||
; AVX512-NEXT: orq %rsi, (%rdx)
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: orq %rax, (%rdx)
|
||||
; AVX512-NEXT: vptest %xmm0, %xmm0
|
||||
; AVX512-NEXT: jne .LBB0_2
|
||||
; AVX512-NEXT: # %bb.1: # %if.then
|
||||
; AVX512-NEXT: xorq %r9, %rax
|
||||
; AVX512-NEXT: xorq %rdi, %r8
|
||||
; AVX512-NEXT: xorl %edx, %edx
|
||||
; AVX512-NEXT: orq %rax, %r8
|
||||
; AVX512-NEXT: sete %dl
|
||||
; AVX512-NEXT: orq %rdx, (%rcx)
|
||||
; AVX512-NEXT: orq %rax, (%rcx)
|
||||
; AVX512-NEXT: .LBB0_2: # %if.end
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user