[LegalizeTypes] Expand UDIV/UREM by constant via chunk summation (#146238)
This patch improves the lowering of 128-bit unsigned division and remainder by constants (UDIV/UREM) by avoiding a fallback to libcall (__udivti3/uremti3) for specific divisors. When a divisor D satisfies the condition (1 << ChunkWidth) % D == 1, the 128-bit value is split into fixed-width chunks (e.g., 30-bit) and summed before applying a smaller UDIV/UREM. This transformation is based on the "remainder by summing digits" trick described in Hacker’s Delight. This fixes #137514 for some constants.
This commit is contained in:
parent
582fa78753
commit
796b218edd
@ -8186,8 +8186,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
|
||||
|
||||
// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
|
||||
// then add in the carry.
|
||||
// TODO: If we can't split it in half, we might be able to split into 3 or
|
||||
// more pieces using a smaller bit width.
|
||||
if (HalfMaxPlus1.urem(Divisor).isOne()) {
|
||||
assert(!LL == !LH && "Expected both input halves or no input halves!");
|
||||
if (!LL)
|
||||
@ -8239,6 +8237,67 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
|
||||
DAG.getConstant(0, dl, HiLoVT));
|
||||
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
|
||||
}
|
||||
} else {
|
||||
// If we cannot split in two halves, look for a smaller chunk width W
|
||||
// such that (1 << W) % Divisor == 1.
|
||||
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||
unsigned BestChunkWidth = 0;
|
||||
|
||||
// Determine the legal scalar integer type for chunk operations.
|
||||
EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
|
||||
unsigned LegalWidth = LegalVT.getScalarSizeInBits();
|
||||
unsigned MaxChunk = std::min<unsigned>(LegalWidth, BitWidth);
|
||||
|
||||
// Search for I where 2^I % Divisor == 1
|
||||
for (unsigned I = MaxChunk, E = MaxChunk / 2; I > E; --I) {
|
||||
APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
|
||||
|
||||
if (Mod.isOne()) {
|
||||
// Ensure (NumChunks * MaxChunkValue) doesn't overflow LegalVT
|
||||
unsigned NumChunks = divideCeil(BitWidth, I);
|
||||
|
||||
// Ensure the sum won't overflow the hardware register (LegalWidth).
|
||||
// Summing N chunks adds ceil(log2(N)) extra carry bits to the width.
|
||||
// Safety check: Base Chunk Width (I) + Carry Bits <= Register Width.
|
||||
if (I + llvm::bit_width(NumChunks - 1) <= LegalWidth) {
|
||||
BestChunkWidth = I;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!BestChunkWidth)
|
||||
return false;
|
||||
|
||||
SDValue In =
|
||||
LL ? DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH) : N->getOperand(0);
|
||||
if (TrailingZeros) {
|
||||
// Save the shifted off bits if we need the remainder.
|
||||
if (Opcode != ISD::UDIV) {
|
||||
APInt Mask = APInt::getLowBitsSet(BitWidth, TrailingZeros);
|
||||
PartialRem =
|
||||
DAG.getNode(ISD::AND, dl, VT, In, DAG.getConstant(Mask, dl, VT));
|
||||
}
|
||||
EVT ShiftVT = getShiftAmountTy(VT, DAG.getDataLayout());
|
||||
In = DAG.getNode(ISD::SRL, dl, VT, In,
|
||||
DAG.getShiftAmountConstant(TrailingZeros, ShiftVT, dl));
|
||||
}
|
||||
SDValue TotalSum = DAG.getConstant(0, dl, LegalVT);
|
||||
SDValue Mask = DAG.getConstant(
|
||||
APInt::getLowBitsSet(LegalWidth, BestChunkWidth), dl, LegalVT);
|
||||
|
||||
for (unsigned I = 0; I < BitWidth; I += BestChunkWidth) {
|
||||
SDValue Shift = DAG.getShiftAmountConstant(I, VT, dl);
|
||||
SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift);
|
||||
// Truncate to LegalVT
|
||||
SDValue TruncChunk = DAG.getNode(ISD::TRUNCATE, dl, LegalVT, Chunk);
|
||||
// For the last chunk, we might not need a mask if it's smaller than
|
||||
// BestChunkWidth, but applying it is always safe.
|
||||
SDValue MaskedChunk =
|
||||
DAG.getNode(ISD::AND, dl, LegalVT, TruncChunk, Mask);
|
||||
TotalSum = DAG.getNode(ISD::ADD, dl, LegalVT, TotalSum, MaskedChunk);
|
||||
}
|
||||
Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, TotalSum);
|
||||
}
|
||||
|
||||
// If we didn't find a sum, we can't do the expansion.
|
||||
@ -8278,7 +8337,9 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
|
||||
if (TrailingZeros) {
|
||||
RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
|
||||
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
|
||||
RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
|
||||
|
||||
SDValue PartialRemLo = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, PartialRem);
|
||||
RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRemLo);
|
||||
}
|
||||
Result.push_back(RemL);
|
||||
Result.push_back(DAG.getConstant(0, dl, HiLoVT));
|
||||
|
||||
@ -500,13 +500,20 @@ entry:
|
||||
define i128 @ui128_7(i128 %a, i128 %b) {
|
||||
; CHECK-SD-LABEL: ui128_7:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-SD-NEXT: mov w2, #7 // =0x7
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-SD-NEXT: extr x8, x1, x0, #60
|
||||
; CHECK-SD-NEXT: and x9, x0, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: and x8, x8, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: add x8, x9, x8
|
||||
; CHECK-SD-NEXT: mov x9, #18725 // =0x4925
|
||||
; CHECK-SD-NEXT: movk x9, #9362, lsl #16
|
||||
; CHECK-SD-NEXT: add x8, x8, x1, lsr #56
|
||||
; CHECK-SD-NEXT: mov x1, xzr
|
||||
; CHECK-SD-NEXT: movk x9, #37449, lsl #32
|
||||
; CHECK-SD-NEXT: movk x9, #18724, lsl #48
|
||||
; CHECK-SD-NEXT: umulh x9, x8, x9
|
||||
; CHECK-SD-NEXT: lsr x9, x9, #1
|
||||
; CHECK-SD-NEXT: sub x9, x9, x9, lsl #3
|
||||
; CHECK-SD-NEXT: add x0, x8, x9
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: ui128_7:
|
||||
@ -579,13 +586,23 @@ entry:
|
||||
define i128 @ui128_100(i128 %a, i128 %b) {
|
||||
; CHECK-SD-LABEL: ui128_100:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-SD-NEXT: mov w2, #100 // =0x64
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-SD-NEXT: extr x8, x1, x0, #2
|
||||
; CHECK-SD-NEXT: lsr x9, x1, #2
|
||||
; CHECK-SD-NEXT: mov w10, #25 // =0x19
|
||||
; CHECK-SD-NEXT: extr x9, x9, x8, #60
|
||||
; CHECK-SD-NEXT: and x8, x8, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: and x9, x9, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: add x8, x8, x9
|
||||
; CHECK-SD-NEXT: mov x9, #62915 // =0xf5c3
|
||||
; CHECK-SD-NEXT: movk x9, #23592, lsl #16
|
||||
; CHECK-SD-NEXT: add x8, x8, x1, lsr #58
|
||||
; CHECK-SD-NEXT: mov x1, xzr
|
||||
; CHECK-SD-NEXT: movk x9, #49807, lsl #32
|
||||
; CHECK-SD-NEXT: movk x9, #10485, lsl #48
|
||||
; CHECK-SD-NEXT: umulh x9, x8, x9
|
||||
; CHECK-SD-NEXT: lsr x9, x9, #2
|
||||
; CHECK-SD-NEXT: msub x8, x9, x10, x8
|
||||
; CHECK-SD-NEXT: bfi x0, x8, #2, #62
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: ui128_100:
|
||||
@ -2556,7 +2573,8 @@ define <3 x i32> @uv3i32_7(<3 x i32> %d, <3 x i32> %e) {
|
||||
; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32
|
||||
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
|
||||
; CHECK-SD-NEXT: sub v2.2s, v0.2s, v1.2s
|
||||
; CHECK-SD: add w8, w8, w9
|
||||
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 def $q0
|
||||
; CHECK-SD-NEXT: add w8, w8, w9
|
||||
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
|
||||
; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #1
|
||||
; CHECK-SD-NEXT: add v1.2s, v2.2s, v1.2s
|
||||
@ -3079,34 +3097,30 @@ entry:
|
||||
define <2 x i128> @uv2i128_7(<2 x i128> %d, <2 x i128> %e) {
|
||||
; CHECK-SD-LABEL: uv2i128_7:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
|
||||
; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
|
||||
; CHECK-SD-NEXT: .cfi_offset w19, -8
|
||||
; CHECK-SD-NEXT: .cfi_offset w20, -16
|
||||
; CHECK-SD-NEXT: .cfi_offset w21, -24
|
||||
; CHECK-SD-NEXT: .cfi_offset w22, -32
|
||||
; CHECK-SD-NEXT: .cfi_offset w30, -48
|
||||
; CHECK-SD-NEXT: mov x19, x3
|
||||
; CHECK-SD-NEXT: mov x20, x2
|
||||
; CHECK-SD-NEXT: mov w2, #7 // =0x7
|
||||
; CHECK-SD-NEXT: extr x9, x1, x0, #60
|
||||
; CHECK-SD-NEXT: extr x8, x3, x2, #60
|
||||
; CHECK-SD-NEXT: and x10, x0, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: mov x11, #18725 // =0x4925
|
||||
; CHECK-SD-NEXT: and x9, x9, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: and x8, x8, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: movk x11, #9362, lsl #16
|
||||
; CHECK-SD-NEXT: add x9, x10, x9
|
||||
; CHECK-SD-NEXT: and x10, x2, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: movk x11, #37449, lsl #32
|
||||
; CHECK-SD-NEXT: add x8, x10, x8
|
||||
; CHECK-SD-NEXT: add x9, x9, x1, lsr #56
|
||||
; CHECK-SD-NEXT: movk x11, #18724, lsl #48
|
||||
; CHECK-SD-NEXT: add x8, x8, x3, lsr #56
|
||||
; CHECK-SD-NEXT: mov x1, xzr
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: mov x21, x0
|
||||
; CHECK-SD-NEXT: mov x22, x1
|
||||
; CHECK-SD-NEXT: mov x0, x20
|
||||
; CHECK-SD-NEXT: mov x1, x19
|
||||
; CHECK-SD-NEXT: mov w2, #7 // =0x7
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: mov x2, x0
|
||||
; CHECK-SD-NEXT: mov x3, x1
|
||||
; CHECK-SD-NEXT: mov x0, x21
|
||||
; CHECK-SD-NEXT: mov x1, x22
|
||||
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
|
||||
; CHECK-SD-NEXT: umulh x10, x9, x11
|
||||
; CHECK-SD-NEXT: umulh x11, x8, x11
|
||||
; CHECK-SD-NEXT: lsr x10, x10, #1
|
||||
; CHECK-SD-NEXT: lsr x11, x11, #1
|
||||
; CHECK-SD-NEXT: sub x10, x10, x10, lsl #3
|
||||
; CHECK-SD-NEXT: sub x11, x11, x11, lsl #3
|
||||
; CHECK-SD-NEXT: add x0, x9, x10
|
||||
; CHECK-SD-NEXT: add x2, x8, x11
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: uv2i128_7:
|
||||
@ -3228,34 +3242,35 @@ entry:
|
||||
define <2 x i128> @uv2i128_100(<2 x i128> %d, <2 x i128> %e) {
|
||||
; CHECK-SD-LABEL: uv2i128_100:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
|
||||
; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
|
||||
; CHECK-SD-NEXT: .cfi_offset w19, -8
|
||||
; CHECK-SD-NEXT: .cfi_offset w20, -16
|
||||
; CHECK-SD-NEXT: .cfi_offset w21, -24
|
||||
; CHECK-SD-NEXT: .cfi_offset w22, -32
|
||||
; CHECK-SD-NEXT: .cfi_offset w30, -48
|
||||
; CHECK-SD-NEXT: mov x19, x3
|
||||
; CHECK-SD-NEXT: mov x20, x2
|
||||
; CHECK-SD-NEXT: mov w2, #100 // =0x64
|
||||
; CHECK-SD-NEXT: lsr x8, x1, #2
|
||||
; CHECK-SD-NEXT: extr x9, x1, x0, #2
|
||||
; CHECK-SD-NEXT: extr x10, x3, x2, #2
|
||||
; CHECK-SD-NEXT: lsr x11, x3, #2
|
||||
; CHECK-SD-NEXT: mov w12, #25 // =0x19
|
||||
; CHECK-SD-NEXT: extr x8, x8, x9, #60
|
||||
; CHECK-SD-NEXT: and x9, x9, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: extr x11, x11, x10, #60
|
||||
; CHECK-SD-NEXT: and x8, x8, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: add x8, x9, x8
|
||||
; CHECK-SD-NEXT: and x9, x10, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: and x10, x11, #0xfffffffffffffff
|
||||
; CHECK-SD-NEXT: mov x11, #62915 // =0xf5c3
|
||||
; CHECK-SD-NEXT: add x9, x9, x10
|
||||
; CHECK-SD-NEXT: add x8, x8, x1, lsr #58
|
||||
; CHECK-SD-NEXT: movk x11, #23592, lsl #16
|
||||
; CHECK-SD-NEXT: add x9, x9, x3, lsr #58
|
||||
; CHECK-SD-NEXT: mov x1, xzr
|
||||
; CHECK-SD-NEXT: movk x11, #49807, lsl #32
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: mov x21, x0
|
||||
; CHECK-SD-NEXT: mov x22, x1
|
||||
; CHECK-SD-NEXT: mov x0, x20
|
||||
; CHECK-SD-NEXT: mov x1, x19
|
||||
; CHECK-SD-NEXT: mov w2, #100 // =0x64
|
||||
; CHECK-SD-NEXT: mov x3, xzr
|
||||
; CHECK-SD-NEXT: bl __umodti3
|
||||
; CHECK-SD-NEXT: mov x2, x0
|
||||
; CHECK-SD-NEXT: mov x3, x1
|
||||
; CHECK-SD-NEXT: mov x0, x21
|
||||
; CHECK-SD-NEXT: mov x1, x22
|
||||
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
|
||||
; CHECK-SD-NEXT: movk x11, #10485, lsl #48
|
||||
; CHECK-SD-NEXT: umulh x10, x8, x11
|
||||
; CHECK-SD-NEXT: umulh x11, x9, x11
|
||||
; CHECK-SD-NEXT: lsr x10, x10, #2
|
||||
; CHECK-SD-NEXT: lsr x11, x11, #2
|
||||
; CHECK-SD-NEXT: msub x8, x10, x12, x8
|
||||
; CHECK-SD-NEXT: msub x9, x11, x12, x9
|
||||
; CHECK-SD-NEXT: bfi x0, x8, #2, #62
|
||||
; CHECK-SD-NEXT: bfi x2, x9, #2, #62
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: uv2i128_100:
|
||||
|
||||
@ -89,17 +89,19 @@ define i32 @dont_fold_urem_i32_umax(i32 %x) {
|
||||
define i64 @dont_fold_urem_i64(i64 %x) {
|
||||
; CHECK-LABEL: dont_fold_urem_i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mflr 0
|
||||
; CHECK-NEXT: stwu 1, -16(1)
|
||||
; CHECK-NEXT: stw 0, 20(1)
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset lr, 4
|
||||
; CHECK-NEXT: li 5, 0
|
||||
; CHECK-NEXT: li 6, 98
|
||||
; CHECK-NEXT: bl __umoddi3
|
||||
; CHECK-NEXT: lwz 0, 20(1)
|
||||
; CHECK-NEXT: addi 1, 1, 16
|
||||
; CHECK-NEXT: mtlr 0
|
||||
; CHECK-NEXT: srwi 6, 4, 22
|
||||
; CHECK-NEXT: rlwinm 7, 4, 31, 11, 31
|
||||
; CHECK-NEXT: rlwimi 6, 3, 10, 11, 21
|
||||
; CHECK-NEXT: lis 5, 1337
|
||||
; CHECK-NEXT: add 6, 7, 6
|
||||
; CHECK-NEXT: srwi 3, 3, 11
|
||||
; CHECK-NEXT: ori 5, 5, 30762
|
||||
; CHECK-NEXT: add 3, 6, 3
|
||||
; CHECK-NEXT: mulhwu 5, 3, 5
|
||||
; CHECK-NEXT: mulli 5, 5, 49
|
||||
; CHECK-NEXT: sub 3, 3, 5
|
||||
; CHECK-NEXT: rlwimi 4, 3, 1, 0, 30
|
||||
; CHECK-NEXT: li 3, 0
|
||||
; CHECK-NEXT: blr
|
||||
%1 = urem i64 %x, 98
|
||||
ret i64 %1
|
||||
|
||||
@ -111,16 +111,78 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind {
|
||||
}
|
||||
|
||||
define i64 @udiv64_constant_add(i64 %a) nounwind {
|
||||
; RV32-LABEL: udiv64_constant_add:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 7
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: call __udivdi3
|
||||
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
; RV32IM-LABEL: udiv64_constant_add:
|
||||
; RV32IM: # %bb.0:
|
||||
; RV32IM-NEXT: lui a2, 262144
|
||||
; RV32IM-NEXT: slli a3, a1, 2
|
||||
; RV32IM-NEXT: srli a4, a0, 30
|
||||
; RV32IM-NEXT: srli a5, a1, 28
|
||||
; RV32IM-NEXT: lui a6, 149797
|
||||
; RV32IM-NEXT: or a3, a4, a3
|
||||
; RV32IM-NEXT: lui a4, 449390
|
||||
; RV32IM-NEXT: addi a2, a2, -1
|
||||
; RV32IM-NEXT: and a3, a3, a2
|
||||
; RV32IM-NEXT: and a2, a0, a2
|
||||
; RV32IM-NEXT: add a2, a2, a3
|
||||
; RV32IM-NEXT: lui a3, 748983
|
||||
; RV32IM-NEXT: addi a6, a6, -1755
|
||||
; RV32IM-NEXT: addi a4, a4, -1171
|
||||
; RV32IM-NEXT: addi a3, a3, -585
|
||||
; RV32IM-NEXT: add a2, a2, a5
|
||||
; RV32IM-NEXT: mulhu a5, a2, a6
|
||||
; RV32IM-NEXT: sub a6, a2, a5
|
||||
; RV32IM-NEXT: srli a6, a6, 1
|
||||
; RV32IM-NEXT: add a5, a6, a5
|
||||
; RV32IM-NEXT: srli a5, a5, 2
|
||||
; RV32IM-NEXT: slli a6, a5, 3
|
||||
; RV32IM-NEXT: sub a5, a5, a6
|
||||
; RV32IM-NEXT: add a2, a2, a5
|
||||
; RV32IM-NEXT: sub a5, a0, a2
|
||||
; RV32IM-NEXT: sltu a0, a0, a2
|
||||
; RV32IM-NEXT: mul a2, a5, a4
|
||||
; RV32IM-NEXT: mulhu a4, a5, a3
|
||||
; RV32IM-NEXT: sub a1, a1, a0
|
||||
; RV32IM-NEXT: add a2, a4, a2
|
||||
; RV32IM-NEXT: mul a1, a1, a3
|
||||
; RV32IM-NEXT: add a1, a2, a1
|
||||
; RV32IM-NEXT: mul a0, a5, a3
|
||||
; RV32IM-NEXT: ret
|
||||
;
|
||||
; RV32IMZB-LABEL: udiv64_constant_add:
|
||||
; RV32IMZB: # %bb.0:
|
||||
; RV32IMZB-NEXT: srli a2, a0, 30
|
||||
; RV32IMZB-NEXT: lui a3, 786432
|
||||
; RV32IMZB-NEXT: slli a4, a0, 2
|
||||
; RV32IMZB-NEXT: srli a5, a1, 28
|
||||
; RV32IMZB-NEXT: lui a6, 149797
|
||||
; RV32IMZB-NEXT: sh2add a2, a1, a2
|
||||
; RV32IMZB-NEXT: andn a2, a2, a3
|
||||
; RV32IMZB-NEXT: lui a3, 449390
|
||||
; RV32IMZB-NEXT: srli a4, a4, 2
|
||||
; RV32IMZB-NEXT: add a4, a4, a5
|
||||
; RV32IMZB-NEXT: lui a5, 748983
|
||||
; RV32IMZB-NEXT: addi a6, a6, -1755
|
||||
; RV32IMZB-NEXT: addi a3, a3, -1171
|
||||
; RV32IMZB-NEXT: addi a5, a5, -585
|
||||
; RV32IMZB-NEXT: add a2, a4, a2
|
||||
; RV32IMZB-NEXT: mulhu a4, a2, a6
|
||||
; RV32IMZB-NEXT: sub a6, a2, a4
|
||||
; RV32IMZB-NEXT: srli a6, a6, 1
|
||||
; RV32IMZB-NEXT: add a4, a6, a4
|
||||
; RV32IMZB-NEXT: srli a4, a4, 2
|
||||
; RV32IMZB-NEXT: slli a6, a4, 3
|
||||
; RV32IMZB-NEXT: sub a4, a4, a6
|
||||
; RV32IMZB-NEXT: add a2, a2, a4
|
||||
; RV32IMZB-NEXT: sub a4, a0, a2
|
||||
; RV32IMZB-NEXT: sltu a0, a0, a2
|
||||
; RV32IMZB-NEXT: mul a2, a4, a3
|
||||
; RV32IMZB-NEXT: mulhu a3, a4, a5
|
||||
; RV32IMZB-NEXT: sub a1, a1, a0
|
||||
; RV32IMZB-NEXT: add a2, a3, a2
|
||||
; RV32IMZB-NEXT: mul a1, a1, a5
|
||||
; RV32IMZB-NEXT: add a1, a2, a1
|
||||
; RV32IMZB-NEXT: mul a0, a4, a5
|
||||
; RV32IMZB-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: udiv64_constant_add:
|
||||
; RV64: # %bb.0:
|
||||
|
||||
@ -117,24 +117,75 @@ define iXLen2 @test_udiv_5(iXLen2 %x) nounwind {
|
||||
define iXLen2 @test_udiv_7(iXLen2 %x) nounwind {
|
||||
; RV32-LABEL: test_udiv_7:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 7
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: call __udivdi3
|
||||
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: lui a2, 262144
|
||||
; RV32-NEXT: slli a3, a1, 2
|
||||
; RV32-NEXT: srli a4, a0, 30
|
||||
; RV32-NEXT: srli a5, a1, 28
|
||||
; RV32-NEXT: lui a6, 149797
|
||||
; RV32-NEXT: or a3, a4, a3
|
||||
; RV32-NEXT: lui a4, 449390
|
||||
; RV32-NEXT: addi a2, a2, -1
|
||||
; RV32-NEXT: and a3, a3, a2
|
||||
; RV32-NEXT: and a2, a0, a2
|
||||
; RV32-NEXT: add a2, a2, a3
|
||||
; RV32-NEXT: lui a3, 748983
|
||||
; RV32-NEXT: addi a6, a6, -1755
|
||||
; RV32-NEXT: addi a4, a4, -1171
|
||||
; RV32-NEXT: addi a3, a3, -585
|
||||
; RV32-NEXT: add a2, a2, a5
|
||||
; RV32-NEXT: mulhu a5, a2, a6
|
||||
; RV32-NEXT: sub a6, a2, a5
|
||||
; RV32-NEXT: srli a6, a6, 1
|
||||
; RV32-NEXT: add a5, a6, a5
|
||||
; RV32-NEXT: srli a5, a5, 2
|
||||
; RV32-NEXT: slli a6, a5, 3
|
||||
; RV32-NEXT: sub a5, a5, a6
|
||||
; RV32-NEXT: add a2, a2, a5
|
||||
; RV32-NEXT: sub a5, a0, a2
|
||||
; RV32-NEXT: sltu a0, a0, a2
|
||||
; RV32-NEXT: mul a2, a5, a4
|
||||
; RV32-NEXT: mulhu a4, a5, a3
|
||||
; RV32-NEXT: sub a1, a1, a0
|
||||
; RV32-NEXT: add a2, a4, a2
|
||||
; RV32-NEXT: mul a1, a1, a3
|
||||
; RV32-NEXT: add a1, a2, a1
|
||||
; RV32-NEXT: mul a0, a5, a3
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: test_udiv_7:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: li a2, 7
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: call __udivti3
|
||||
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: li a2, -1
|
||||
; RV64-NEXT: slli a3, a1, 4
|
||||
; RV64-NEXT: srli a4, a0, 60
|
||||
; RV64-NEXT: srli a5, a1, 56
|
||||
; RV64-NEXT: lui a6, %hi(.LCPI2_0)
|
||||
; RV64-NEXT: or a3, a4, a3
|
||||
; RV64-NEXT: lui a4, 748983
|
||||
; RV64-NEXT: srli a2, a2, 4
|
||||
; RV64-NEXT: ld a6, %lo(.LCPI2_0)(a6)
|
||||
; RV64-NEXT: addi a4, a4, -585
|
||||
; RV64-NEXT: and a3, a3, a2
|
||||
; RV64-NEXT: and a2, a0, a2
|
||||
; RV64-NEXT: add a2, a2, a3
|
||||
; RV64-NEXT: slli a3, a4, 33
|
||||
; RV64-NEXT: add a3, a4, a3
|
||||
; RV64-NEXT: lui a4, %hi(.LCPI2_1)
|
||||
; RV64-NEXT: ld a4, %lo(.LCPI2_1)(a4)
|
||||
; RV64-NEXT: add a2, a2, a5
|
||||
; RV64-NEXT: mulhu a5, a2, a6
|
||||
; RV64-NEXT: srli a5, a5, 1
|
||||
; RV64-NEXT: slli a6, a5, 3
|
||||
; RV64-NEXT: sub a5, a5, a6
|
||||
; RV64-NEXT: add a2, a2, a5
|
||||
; RV64-NEXT: sub a5, a0, a2
|
||||
; RV64-NEXT: sltu a0, a0, a2
|
||||
; RV64-NEXT: mul a2, a5, a4
|
||||
; RV64-NEXT: mulhu a4, a5, a3
|
||||
; RV64-NEXT: sub a1, a1, a0
|
||||
; RV64-NEXT: add a2, a4, a2
|
||||
; RV64-NEXT: mul a1, a1, a3
|
||||
; RV64-NEXT: add a1, a2, a1
|
||||
; RV64-NEXT: mul a0, a5, a3
|
||||
; RV64-NEXT: ret
|
||||
%a = udiv iXLen2 %x, 7
|
||||
ret iXLen2 %a
|
||||
@ -143,24 +194,67 @@ define iXLen2 @test_udiv_7(iXLen2 %x) nounwind {
|
||||
define iXLen2 @test_udiv_9(iXLen2 %x) nounwind {
|
||||
; RV32-LABEL: test_udiv_9:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 9
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: call __udivdi3
|
||||
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: lui a2, 262144
|
||||
; RV32-NEXT: slli a3, a1, 2
|
||||
; RV32-NEXT: srli a4, a0, 30
|
||||
; RV32-NEXT: srli a5, a1, 28
|
||||
; RV32-NEXT: lui a6, 233017
|
||||
; RV32-NEXT: or a3, a4, a3
|
||||
; RV32-NEXT: lui a4, 582542
|
||||
; RV32-NEXT: addi a2, a2, -1
|
||||
; RV32-NEXT: addi a6, a6, -455
|
||||
; RV32-NEXT: addi a4, a4, 910
|
||||
; RV32-NEXT: and a3, a3, a2
|
||||
; RV32-NEXT: and a2, a0, a2
|
||||
; RV32-NEXT: add a2, a2, a3
|
||||
; RV32-NEXT: add a2, a2, a5
|
||||
; RV32-NEXT: mulhu a3, a2, a6
|
||||
; RV32-NEXT: srli a3, a3, 1
|
||||
; RV32-NEXT: slli a5, a3, 3
|
||||
; RV32-NEXT: add a3, a5, a3
|
||||
; RV32-NEXT: sub a2, a2, a3
|
||||
; RV32-NEXT: sub a3, a0, a2
|
||||
; RV32-NEXT: sltu a0, a0, a2
|
||||
; RV32-NEXT: mul a2, a3, a4
|
||||
; RV32-NEXT: mulhu a4, a3, a6
|
||||
; RV32-NEXT: sub a1, a1, a0
|
||||
; RV32-NEXT: add a2, a4, a2
|
||||
; RV32-NEXT: mul a1, a1, a6
|
||||
; RV32-NEXT: add a1, a2, a1
|
||||
; RV32-NEXT: mul a0, a3, a6
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: test_udiv_9:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: li a2, 9
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: call __udivti3
|
||||
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: li a2, -1
|
||||
; RV64-NEXT: slli a3, a1, 4
|
||||
; RV64-NEXT: srli a4, a0, 60
|
||||
; RV64-NEXT: srli a5, a1, 56
|
||||
; RV64-NEXT: lui a6, %hi(.LCPI3_0)
|
||||
; RV64-NEXT: or a3, a4, a3
|
||||
; RV64-NEXT: lui a4, %hi(.LCPI3_1)
|
||||
; RV64-NEXT: srli a2, a2, 4
|
||||
; RV64-NEXT: and a3, a3, a2
|
||||
; RV64-NEXT: and a2, a0, a2
|
||||
; RV64-NEXT: add a2, a2, a3
|
||||
; RV64-NEXT: lui a3, %hi(.LCPI3_2)
|
||||
; RV64-NEXT: ld a6, %lo(.LCPI3_0)(a6)
|
||||
; RV64-NEXT: ld a4, %lo(.LCPI3_1)(a4)
|
||||
; RV64-NEXT: ld a3, %lo(.LCPI3_2)(a3)
|
||||
; RV64-NEXT: add a2, a2, a5
|
||||
; RV64-NEXT: mulhu a5, a2, a6
|
||||
; RV64-NEXT: slli a6, a5, 3
|
||||
; RV64-NEXT: add a5, a6, a5
|
||||
; RV64-NEXT: sub a2, a2, a5
|
||||
; RV64-NEXT: sub a5, a0, a2
|
||||
; RV64-NEXT: sltu a0, a0, a2
|
||||
; RV64-NEXT: mul a2, a5, a4
|
||||
; RV64-NEXT: mulhu a4, a5, a3
|
||||
; RV64-NEXT: sub a1, a1, a0
|
||||
; RV64-NEXT: add a2, a4, a2
|
||||
; RV64-NEXT: mul a1, a1, a3
|
||||
; RV64-NEXT: add a1, a2, a1
|
||||
; RV64-NEXT: mul a0, a5, a3
|
||||
; RV64-NEXT: ret
|
||||
%a = udiv iXLen2 %x, 9
|
||||
ret iXLen2 %a
|
||||
|
||||
@ -79,24 +79,49 @@ define iXLen2 @test_urem_5(iXLen2 %x) nounwind {
|
||||
define iXLen2 @test_urem_7(iXLen2 %x) nounwind {
|
||||
; RV32-LABEL: test_urem_7:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 7
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: call __umoddi3
|
||||
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: lui a2, 262144
|
||||
; RV32-NEXT: slli a3, a1, 2
|
||||
; RV32-NEXT: srli a4, a0, 30
|
||||
; RV32-NEXT: srli a1, a1, 28
|
||||
; RV32-NEXT: or a3, a4, a3
|
||||
; RV32-NEXT: lui a4, 149797
|
||||
; RV32-NEXT: addi a2, a2, -1
|
||||
; RV32-NEXT: and a0, a0, a2
|
||||
; RV32-NEXT: and a2, a3, a2
|
||||
; RV32-NEXT: add a0, a0, a2
|
||||
; RV32-NEXT: add a0, a0, a1
|
||||
; RV32-NEXT: addi a1, a4, -1755
|
||||
; RV32-NEXT: mulhu a1, a0, a1
|
||||
; RV32-NEXT: sub a2, a0, a1
|
||||
; RV32-NEXT: srli a2, a2, 1
|
||||
; RV32-NEXT: add a1, a2, a1
|
||||
; RV32-NEXT: srli a1, a1, 2
|
||||
; RV32-NEXT: slli a2, a1, 3
|
||||
; RV32-NEXT: sub a1, a1, a2
|
||||
; RV32-NEXT: add a0, a0, a1
|
||||
; RV32-NEXT: li a1, 0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: test_urem_7:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: li a2, 7
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: call __umodti3
|
||||
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: li a2, -1
|
||||
; RV64-NEXT: slli a3, a1, 4
|
||||
; RV64-NEXT: srli a4, a0, 60
|
||||
; RV64-NEXT: or a3, a4, a3
|
||||
; RV64-NEXT: lui a4, %hi(.LCPI2_0)
|
||||
; RV64-NEXT: srli a2, a2, 4
|
||||
; RV64-NEXT: ld a4, %lo(.LCPI2_0)(a4)
|
||||
; RV64-NEXT: and a0, a0, a2
|
||||
; RV64-NEXT: and a2, a3, a2
|
||||
; RV64-NEXT: add a0, a0, a2
|
||||
; RV64-NEXT: srli a1, a1, 56
|
||||
; RV64-NEXT: add a0, a0, a1
|
||||
; RV64-NEXT: mulhu a1, a0, a4
|
||||
; RV64-NEXT: srli a1, a1, 1
|
||||
; RV64-NEXT: slli a2, a1, 3
|
||||
; RV64-NEXT: sub a1, a1, a2
|
||||
; RV64-NEXT: add a0, a0, a1
|
||||
; RV64-NEXT: li a1, 0
|
||||
; RV64-NEXT: ret
|
||||
%a = urem iXLen2 %x, 7
|
||||
ret iXLen2 %a
|
||||
@ -105,24 +130,45 @@ define iXLen2 @test_urem_7(iXLen2 %x) nounwind {
|
||||
define iXLen2 @test_urem_9(iXLen2 %x) nounwind {
|
||||
; RV32-LABEL: test_urem_9:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 9
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: call __umoddi3
|
||||
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: lui a2, 262144
|
||||
; RV32-NEXT: slli a3, a1, 2
|
||||
; RV32-NEXT: srli a4, a0, 30
|
||||
; RV32-NEXT: srli a1, a1, 28
|
||||
; RV32-NEXT: or a3, a4, a3
|
||||
; RV32-NEXT: lui a4, 233017
|
||||
; RV32-NEXT: addi a2, a2, -1
|
||||
; RV32-NEXT: and a0, a0, a2
|
||||
; RV32-NEXT: and a2, a3, a2
|
||||
; RV32-NEXT: add a0, a0, a2
|
||||
; RV32-NEXT: add a0, a0, a1
|
||||
; RV32-NEXT: addi a1, a4, -455
|
||||
; RV32-NEXT: mulhu a1, a0, a1
|
||||
; RV32-NEXT: srli a1, a1, 1
|
||||
; RV32-NEXT: slli a2, a1, 3
|
||||
; RV32-NEXT: add a1, a2, a1
|
||||
; RV32-NEXT: sub a0, a0, a1
|
||||
; RV32-NEXT: li a1, 0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: test_urem_9:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: li a2, 9
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: call __umodti3
|
||||
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: li a2, -1
|
||||
; RV64-NEXT: slli a3, a1, 4
|
||||
; RV64-NEXT: srli a4, a0, 60
|
||||
; RV64-NEXT: or a3, a4, a3
|
||||
; RV64-NEXT: lui a4, %hi(.LCPI3_0)
|
||||
; RV64-NEXT: srli a2, a2, 4
|
||||
; RV64-NEXT: ld a4, %lo(.LCPI3_0)(a4)
|
||||
; RV64-NEXT: and a0, a0, a2
|
||||
; RV64-NEXT: and a2, a3, a2
|
||||
; RV64-NEXT: add a0, a0, a2
|
||||
; RV64-NEXT: srli a1, a1, 56
|
||||
; RV64-NEXT: add a0, a0, a1
|
||||
; RV64-NEXT: mulhu a1, a0, a4
|
||||
; RV64-NEXT: slli a2, a1, 3
|
||||
; RV64-NEXT: add a1, a2, a1
|
||||
; RV64-NEXT: sub a0, a0, a1
|
||||
; RV64-NEXT: li a1, 0
|
||||
; RV64-NEXT: ret
|
||||
%a = urem iXLen2 %x, 9
|
||||
ret iXLen2 %a
|
||||
|
||||
@ -229,13 +229,28 @@ define i64 @dont_fold_urem_i64(i64 %x) nounwind {
|
||||
;
|
||||
; RV32IM-LABEL: dont_fold_urem_i64:
|
||||
; RV32IM: # %bb.0:
|
||||
; RV32IM-NEXT: addi sp, sp, -16
|
||||
; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; RV32IM-NEXT: li a2, 98
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: addi sp, sp, 16
|
||||
; RV32IM-NEXT: slli a2, a1, 31
|
||||
; RV32IM-NEXT: srli a3, a0, 1
|
||||
; RV32IM-NEXT: andi a4, a1, 2046
|
||||
; RV32IM-NEXT: srli a1, a1, 11
|
||||
; RV32IM-NEXT: or a2, a3, a2
|
||||
; RV32IM-NEXT: slli a4, a4, 10
|
||||
; RV32IM-NEXT: srli a3, a2, 21
|
||||
; RV32IM-NEXT: or a3, a3, a4
|
||||
; RV32IM-NEXT: lui a4, 21400
|
||||
; RV32IM-NEXT: slli a2, a2, 11
|
||||
; RV32IM-NEXT: srli a2, a2, 11
|
||||
; RV32IM-NEXT: add a2, a2, a3
|
||||
; RV32IM-NEXT: li a3, 49
|
||||
; RV32IM-NEXT: addi a4, a4, -2006
|
||||
; RV32IM-NEXT: add a1, a2, a1
|
||||
; RV32IM-NEXT: mulhu a2, a1, a4
|
||||
; RV32IM-NEXT: mul a2, a2, a3
|
||||
; RV32IM-NEXT: sub a1, a1, a2
|
||||
; RV32IM-NEXT: slli a1, a1, 1
|
||||
; RV32IM-NEXT: andi a0, a0, 1
|
||||
; RV32IM-NEXT: or a0, a1, a0
|
||||
; RV32IM-NEXT: li a1, 0
|
||||
; RV32IM-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: dont_fold_urem_i64:
|
||||
|
||||
@ -862,51 +862,58 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
|
||||
; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
|
||||
; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
|
||||
; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
|
||||
; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
|
||||
; RV32IM-NEXT: lw s1, 16(a1)
|
||||
; RV32IM-NEXT: lw s2, 20(a1)
|
||||
; RV32IM-NEXT: lw s3, 24(a1)
|
||||
; RV32IM-NEXT: lw s4, 28(a1)
|
||||
; RV32IM-NEXT: lw a3, 0(a1)
|
||||
; RV32IM-NEXT: lw a4, 4(a1)
|
||||
; RV32IM-NEXT: lw s5, 8(a1)
|
||||
; RV32IM-NEXT: lw s6, 12(a1)
|
||||
; RV32IM-NEXT: mv s0, a0
|
||||
; RV32IM-NEXT: lw a2, 16(a1)
|
||||
; RV32IM-NEXT: lw a4, 20(a1)
|
||||
; RV32IM-NEXT: lw s1, 24(a1)
|
||||
; RV32IM-NEXT: lw s2, 28(a1)
|
||||
; RV32IM-NEXT: lw a0, 0(a1)
|
||||
; RV32IM-NEXT: lw a3, 4(a1)
|
||||
; RV32IM-NEXT: lw s3, 8(a1)
|
||||
; RV32IM-NEXT: lw s4, 12(a1)
|
||||
; RV32IM-NEXT: lui a1, 1024
|
||||
; RV32IM-NEXT: slli a5, a4, 10
|
||||
; RV32IM-NEXT: srli a6, a2, 22
|
||||
; RV32IM-NEXT: or a5, a6, a5
|
||||
; RV32IM-NEXT: lui a6, 45590
|
||||
; RV32IM-NEXT: addi a1, a1, -1
|
||||
; RV32IM-NEXT: addi a6, a6, 1069
|
||||
; RV32IM-NEXT: and a2, a2, a1
|
||||
; RV32IM-NEXT: srli a4, a4, 12
|
||||
; RV32IM-NEXT: add a2, a2, a4
|
||||
; RV32IM-NEXT: and a1, a5, a1
|
||||
; RV32IM-NEXT: add a1, a2, a1
|
||||
; RV32IM-NEXT: mulhu a2, a1, a6
|
||||
; RV32IM-NEXT: li a4, 23
|
||||
; RV32IM-NEXT: mul a2, a2, a4
|
||||
; RV32IM-NEXT: sub s7, a1, a2
|
||||
; RV32IM-NEXT: li a2, 1
|
||||
; RV32IM-NEXT: mv a0, a3
|
||||
; RV32IM-NEXT: mv a1, a4
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: mv s7, a0
|
||||
; RV32IM-NEXT: mv s8, a1
|
||||
; RV32IM-NEXT: li a2, 654
|
||||
; RV32IM-NEXT: mv a0, s5
|
||||
; RV32IM-NEXT: mv a1, s6
|
||||
; RV32IM-NEXT: mv a1, a3
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: mv s5, a0
|
||||
; RV32IM-NEXT: mv s6, a1
|
||||
; RV32IM-NEXT: li a2, 23
|
||||
; RV32IM-NEXT: mv a0, s1
|
||||
; RV32IM-NEXT: mv a1, s2
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: mv s1, a0
|
||||
; RV32IM-NEXT: mv s2, a1
|
||||
; RV32IM-NEXT: lui a2, 1
|
||||
; RV32IM-NEXT: addi a2, a2, 1327
|
||||
; RV32IM-NEXT: li a2, 654
|
||||
; RV32IM-NEXT: mv a0, s3
|
||||
; RV32IM-NEXT: mv a1, s4
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: sw s1, 16(s0)
|
||||
; RV32IM-NEXT: sw s2, 20(s0)
|
||||
; RV32IM-NEXT: mv s3, a0
|
||||
; RV32IM-NEXT: mv s4, a1
|
||||
; RV32IM-NEXT: lui a2, 1
|
||||
; RV32IM-NEXT: addi a2, a2, 1327
|
||||
; RV32IM-NEXT: mv a0, s1
|
||||
; RV32IM-NEXT: mv a1, s2
|
||||
; RV32IM-NEXT: li a3, 0
|
||||
; RV32IM-NEXT: call __umoddi3
|
||||
; RV32IM-NEXT: sw s7, 16(s0)
|
||||
; RV32IM-NEXT: sw zero, 20(s0)
|
||||
; RV32IM-NEXT: sw a0, 24(s0)
|
||||
; RV32IM-NEXT: sw a1, 28(s0)
|
||||
; RV32IM-NEXT: sw s7, 0(s0)
|
||||
; RV32IM-NEXT: sw s8, 4(s0)
|
||||
; RV32IM-NEXT: sw s5, 8(s0)
|
||||
; RV32IM-NEXT: sw s6, 12(s0)
|
||||
; RV32IM-NEXT: sw s5, 0(s0)
|
||||
; RV32IM-NEXT: sw s6, 4(s0)
|
||||
; RV32IM-NEXT: sw s3, 8(s0)
|
||||
; RV32IM-NEXT: sw s4, 12(s0)
|
||||
; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
|
||||
@ -916,7 +923,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
|
||||
; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
|
||||
; RV32IM-NEXT: addi sp, sp, 48
|
||||
; RV32IM-NEXT: ret
|
||||
;
|
||||
|
||||
@ -294,19 +294,48 @@ entry:
|
||||
define i64 @PR23590(i64 %x) nounwind {
|
||||
; X86-LABEL: PR23590:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: subl $12, %esp
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $12345 # imm = 0x3039
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: calll __umoddi3
|
||||
; X86-NEXT: addl $16, %esp
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $7
|
||||
; X86-NEXT: pushl %edx
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: calll __udivdi3
|
||||
; X86-NEXT: addl $28, %esp
|
||||
; X86-NEXT: movl %eax, %esi
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF
|
||||
; X86-NEXT: movl %esi, %edx
|
||||
; X86-NEXT: shrdl $30, %ecx, %edx
|
||||
; X86-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF
|
||||
; X86-NEXT: movl %ecx, %edi
|
||||
; X86-NEXT: shrl $28, %edi
|
||||
; X86-NEXT: addl %eax, %edi
|
||||
; X86-NEXT: addl %edx, %edi
|
||||
; X86-NEXT: movl $613566757, %edx # imm = 0x24924925
|
||||
; X86-NEXT: movl %edi, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: movl %edi, %eax
|
||||
; X86-NEXT: subl %edx, %eax
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: addl %edx, %eax
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (,%eax,8), %edx
|
||||
; X86-NEXT: subl %edx, %eax
|
||||
; X86-NEXT: addl %edi, %eax
|
||||
; X86-NEXT: subl %eax, %esi
|
||||
; X86-NEXT: sbbl $0, %ecx
|
||||
; X86-NEXT: movl $-1227133513, %edx # imm = 0xB6DB6DB7
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: imull $1840700269, %esi, %esi # imm = 0x6DB6DB6D
|
||||
; X86-NEXT: addl %esi, %edx
|
||||
; X86-NEXT: imull $-1227133513, %ecx, %ecx # imm = 0xB6DB6DB7
|
||||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: addl $4, %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-FAST-LABEL: PR23590:
|
||||
|
||||
@ -67,25 +67,42 @@ define i64 @div128(i128 %x) nounwind {
|
||||
define i64 @umod128(i128 %x) nounwind {
|
||||
; X86-64-LABEL: umod128:
|
||||
; X86-64: # %bb.0:
|
||||
; X86-64-NEXT: pushq %rax
|
||||
; X86-64-NEXT: movl $11, %edx
|
||||
; X86-64-NEXT: xorl %ecx, %ecx
|
||||
; X86-64-NEXT: callq __umodti3@PLT
|
||||
; X86-64-NEXT: popq %rcx
|
||||
; X86-64-NEXT: movabsq $1152921504606846975, %rcx # imm = 0xFFFFFFFFFFFFFFF
|
||||
; X86-64-NEXT: movq %rdi, %rax
|
||||
; X86-64-NEXT: andq %rcx, %rax
|
||||
; X86-64-NEXT: shrdq $60, %rsi, %rdi
|
||||
; X86-64-NEXT: andq %rdi, %rcx
|
||||
; X86-64-NEXT: addq %rax, %rcx
|
||||
; X86-64-NEXT: shrq $56, %rsi
|
||||
; X86-64-NEXT: addq %rsi, %rcx
|
||||
; X86-64-NEXT: movabsq $3353953467947191203, %rdx # imm = 0x2E8BA2E8BA2E8BA3
|
||||
; X86-64-NEXT: movq %rcx, %rax
|
||||
; X86-64-NEXT: mulq %rdx
|
||||
; X86-64-NEXT: shrq %rdx
|
||||
; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; X86-64-NEXT: leaq (%rdx,%rax,2), %rax
|
||||
; X86-64-NEXT: subq %rax, %rcx
|
||||
; X86-64-NEXT: movq %rcx, %rax
|
||||
; X86-64-NEXT: retq
|
||||
;
|
||||
; WIN64-LABEL: umod128:
|
||||
; WIN64: # %bb.0:
|
||||
; WIN64-NEXT: subq $72, %rsp
|
||||
; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
|
||||
; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
||||
; WIN64-NEXT: movq $11, {{[0-9]+}}(%rsp)
|
||||
; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
|
||||
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
||||
; WIN64-NEXT: callq __umodti3
|
||||
; WIN64-NEXT: movq %xmm0, %rax
|
||||
; WIN64-NEXT: addq $72, %rsp
|
||||
; WIN64-NEXT: movabsq $1152921504606846975, %r8 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; WIN64-NEXT: movq %rcx, %rax
|
||||
; WIN64-NEXT: andq %r8, %rax
|
||||
; WIN64-NEXT: shrdq $60, %rdx, %rcx
|
||||
; WIN64-NEXT: andq %rcx, %r8
|
||||
; WIN64-NEXT: addq %rax, %r8
|
||||
; WIN64-NEXT: shrq $56, %rdx
|
||||
; WIN64-NEXT: addq %rdx, %r8
|
||||
; WIN64-NEXT: movabsq $3353953467947191203, %rcx # imm = 0x2E8BA2E8BA2E8BA3
|
||||
; WIN64-NEXT: movq %r8, %rax
|
||||
; WIN64-NEXT: mulq %rcx
|
||||
; WIN64-NEXT: shrq %rdx
|
||||
; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; WIN64-NEXT: leaq (%rdx,%rax,2), %rax
|
||||
; WIN64-NEXT: subq %rax, %r8
|
||||
; WIN64-NEXT: movq %r8, %rax
|
||||
; WIN64-NEXT: retq
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -940,3 +940,477 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
||||
%res = urem <16 x i8> %a, <i8 7, i8 8, i8 9, i8 10,i8 11, i8 12, i8 13, i8 14, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9,i8 9, i8 7>
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <2 x i128> @v2i128_div_by_7(<2 x i128> %x) nounwind {
|
||||
; SSE-LABEL: v2i128_div_by_7:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq %rcx, %r9
|
||||
; SSE-NEXT: movq %rdx, %rcx
|
||||
; SSE-NEXT: movabsq $1152921504606846975, %r14 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: andq %r14, %rax
|
||||
; SSE-NEXT: movq %rsi, %rdx
|
||||
; SSE-NEXT: shrdq $60, %rcx, %rdx
|
||||
; SSE-NEXT: andq %r14, %rdx
|
||||
; SSE-NEXT: addq %rax, %rdx
|
||||
; SSE-NEXT: movq %rcx, %r10
|
||||
; SSE-NEXT: shrq $56, %r10
|
||||
; SSE-NEXT: addq %rdx, %r10
|
||||
; SSE-NEXT: movabsq $5270498306774157605, %r15 # imm = 0x4924924924924925
|
||||
; SSE-NEXT: movq %r10, %rax
|
||||
; SSE-NEXT: mulq %r15
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leaq (,%rdx,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rdx
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: subq %rdx, %rsi
|
||||
; SSE-NEXT: sbbq $0, %rcx
|
||||
; SSE-NEXT: movabsq $-5270498306774157605, %r11 # imm = 0xB6DB6DB6DB6DB6DB
|
||||
; SSE-NEXT: movq %rsi, %r10
|
||||
; SSE-NEXT: imulq %r11, %r10
|
||||
; SSE-NEXT: movabsq $7905747460161236407, %rbx # imm = 0x6DB6DB6DB6DB6DB7
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: mulq %rbx
|
||||
; SSE-NEXT: movq %rax, %rsi
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: imulq %rbx, %rcx
|
||||
; SSE-NEXT: addq %rdx, %rcx
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: andq %r14, %rax
|
||||
; SSE-NEXT: movq %r9, %rdx
|
||||
; SSE-NEXT: shrdq $60, %r8, %rdx
|
||||
; SSE-NEXT: andq %r14, %rdx
|
||||
; SSE-NEXT: addq %rax, %rdx
|
||||
; SSE-NEXT: movq %r8, %r10
|
||||
; SSE-NEXT: shrq $56, %r10
|
||||
; SSE-NEXT: addq %rdx, %r10
|
||||
; SSE-NEXT: movq %r10, %rax
|
||||
; SSE-NEXT: mulq %r15
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leaq (,%rdx,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rdx
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: subq %rdx, %r9
|
||||
; SSE-NEXT: sbbq $0, %r8
|
||||
; SSE-NEXT: imulq %r9, %r11
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: mulq %rbx
|
||||
; SSE-NEXT: addq %r11, %rdx
|
||||
; SSE-NEXT: imulq %rbx, %r8
|
||||
; SSE-NEXT: addq %rdx, %r8
|
||||
; SSE-NEXT: movq %rax, 16(%rdi)
|
||||
; SSE-NEXT: movq %rsi, (%rdi)
|
||||
; SSE-NEXT: movq %r8, 24(%rdi)
|
||||
; SSE-NEXT: movq %rcx, 8(%rdi)
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: popq %rbx
|
||||
; SSE-NEXT: popq %r14
|
||||
; SSE-NEXT: popq %r15
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128_div_by_7:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: pushq %r15
|
||||
; AVX-NEXT: pushq %r14
|
||||
; AVX-NEXT: pushq %rbx
|
||||
; AVX-NEXT: movq %rcx, %r9
|
||||
; AVX-NEXT: movq %rdx, %rcx
|
||||
; AVX-NEXT: movabsq $1152921504606846975, %r14 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: andq %r14, %rax
|
||||
; AVX-NEXT: movq %rsi, %rdx
|
||||
; AVX-NEXT: shrdq $60, %rcx, %rdx
|
||||
; AVX-NEXT: andq %r14, %rdx
|
||||
; AVX-NEXT: addq %rax, %rdx
|
||||
; AVX-NEXT: movq %rcx, %r10
|
||||
; AVX-NEXT: shrq $56, %r10
|
||||
; AVX-NEXT: addq %rdx, %r10
|
||||
; AVX-NEXT: movabsq $5270498306774157605, %r15 # imm = 0x4924924924924925
|
||||
; AVX-NEXT: movq %r10, %rax
|
||||
; AVX-NEXT: mulq %r15
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leaq (,%rdx,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rdx
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: subq %rdx, %rsi
|
||||
; AVX-NEXT: sbbq $0, %rcx
|
||||
; AVX-NEXT: movabsq $-5270498306774157605, %r11 # imm = 0xB6DB6DB6DB6DB6DB
|
||||
; AVX-NEXT: movq %rsi, %r10
|
||||
; AVX-NEXT: imulq %r11, %r10
|
||||
; AVX-NEXT: movabsq $7905747460161236407, %rbx # imm = 0x6DB6DB6DB6DB6DB7
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: mulq %rbx
|
||||
; AVX-NEXT: movq %rax, %rsi
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: imulq %rbx, %rcx
|
||||
; AVX-NEXT: addq %rdx, %rcx
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: andq %r14, %rax
|
||||
; AVX-NEXT: movq %r9, %rdx
|
||||
; AVX-NEXT: shrdq $60, %r8, %rdx
|
||||
; AVX-NEXT: andq %r14, %rdx
|
||||
; AVX-NEXT: addq %rax, %rdx
|
||||
; AVX-NEXT: movq %r8, %r10
|
||||
; AVX-NEXT: shrq $56, %r10
|
||||
; AVX-NEXT: addq %rdx, %r10
|
||||
; AVX-NEXT: movq %r10, %rax
|
||||
; AVX-NEXT: mulq %r15
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leaq (,%rdx,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rdx
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: subq %rdx, %r9
|
||||
; AVX-NEXT: sbbq $0, %r8
|
||||
; AVX-NEXT: imulq %r9, %r11
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: mulq %rbx
|
||||
; AVX-NEXT: addq %r11, %rdx
|
||||
; AVX-NEXT: imulq %rbx, %r8
|
||||
; AVX-NEXT: addq %rdx, %r8
|
||||
; AVX-NEXT: movq %rax, 16(%rdi)
|
||||
; AVX-NEXT: movq %rsi, (%rdi)
|
||||
; AVX-NEXT: movq %r8, 24(%rdi)
|
||||
; AVX-NEXT: movq %rcx, 8(%rdi)
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: popq %rbx
|
||||
; AVX-NEXT: popq %r14
|
||||
; AVX-NEXT: popq %r15
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%div = udiv <2 x i128> %x, <i128 7, i128 7>
|
||||
ret <2 x i128> %div
|
||||
}
|
||||
|
||||
define <2 x i128> @v2i128_div_by_14(<2 x i128> %x) nounwind {
|
||||
; SSE-LABEL: v2i128_div_by_14:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq %rcx, %r9
|
||||
; SSE-NEXT: movq %rdx, %rcx
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: shrdq $1, %rdx, %rax
|
||||
; SSE-NEXT: movabsq $1152921504606846975, %r11 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: shldq $4, %rax, %rdx
|
||||
; SSE-NEXT: andq %r11, %rax
|
||||
; SSE-NEXT: andq %r11, %rdx
|
||||
; SSE-NEXT: movq %rcx, %r10
|
||||
; SSE-NEXT: shrq $57, %r10
|
||||
; SSE-NEXT: addq %rax, %r10
|
||||
; SSE-NEXT: addq %rdx, %r10
|
||||
; SSE-NEXT: movabsq $5270498306774157605, %r15 # imm = 0x4924924924924925
|
||||
; SSE-NEXT: movq %r10, %rax
|
||||
; SSE-NEXT: mulq %r15
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leaq (,%rdx,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rdx
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: subq %rdx, %rsi
|
||||
; SSE-NEXT: sbbq $0, %rcx
|
||||
; SSE-NEXT: movabsq $-5270498306774157605, %rbx # imm = 0xB6DB6DB6DB6DB6DB
|
||||
; SSE-NEXT: movq %rsi, %r10
|
||||
; SSE-NEXT: imulq %rbx, %r10
|
||||
; SSE-NEXT: movabsq $7905747460161236407, %r14 # imm = 0x6DB6DB6DB6DB6DB7
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: mulq %r14
|
||||
; SSE-NEXT: movq %rax, %rsi
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: imulq %r14, %rcx
|
||||
; SSE-NEXT: addq %rdx, %rcx
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: shrdq $1, %r8, %rax
|
||||
; SSE-NEXT: movq %r8, %rdx
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: shldq $4, %rax, %rdx
|
||||
; SSE-NEXT: andq %r11, %rax
|
||||
; SSE-NEXT: andq %r11, %rdx
|
||||
; SSE-NEXT: movq %r8, %r10
|
||||
; SSE-NEXT: shrq $57, %r10
|
||||
; SSE-NEXT: addq %rax, %r10
|
||||
; SSE-NEXT: addq %rdx, %r10
|
||||
; SSE-NEXT: movq %r10, %rax
|
||||
; SSE-NEXT: mulq %r15
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leaq (,%rdx,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rdx
|
||||
; SSE-NEXT: addq %r10, %rdx
|
||||
; SSE-NEXT: subq %rdx, %r9
|
||||
; SSE-NEXT: sbbq $0, %r8
|
||||
; SSE-NEXT: imulq %r9, %rbx
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: mulq %r14
|
||||
; SSE-NEXT: addq %rbx, %rdx
|
||||
; SSE-NEXT: imulq %r14, %r8
|
||||
; SSE-NEXT: addq %rdx, %r8
|
||||
; SSE-NEXT: movq %rax, 16(%rdi)
|
||||
; SSE-NEXT: movq %rsi, (%rdi)
|
||||
; SSE-NEXT: movq %r8, 24(%rdi)
|
||||
; SSE-NEXT: movq %rcx, 8(%rdi)
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: popq %rbx
|
||||
; SSE-NEXT: popq %r14
|
||||
; SSE-NEXT: popq %r15
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128_div_by_14:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: pushq %r15
|
||||
; AVX-NEXT: pushq %r14
|
||||
; AVX-NEXT: pushq %rbx
|
||||
; AVX-NEXT: movq %rcx, %r9
|
||||
; AVX-NEXT: movq %rdx, %rcx
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: shrdq $1, %rdx, %rax
|
||||
; AVX-NEXT: movabsq $1152921504606846975, %r11 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: shldq $4, %rax, %rdx
|
||||
; AVX-NEXT: andq %r11, %rax
|
||||
; AVX-NEXT: andq %r11, %rdx
|
||||
; AVX-NEXT: movq %rcx, %r10
|
||||
; AVX-NEXT: shrq $57, %r10
|
||||
; AVX-NEXT: addq %rax, %r10
|
||||
; AVX-NEXT: addq %rdx, %r10
|
||||
; AVX-NEXT: movabsq $5270498306774157605, %r15 # imm = 0x4924924924924925
|
||||
; AVX-NEXT: movq %r10, %rax
|
||||
; AVX-NEXT: mulq %r15
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leaq (,%rdx,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rdx
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: subq %rdx, %rsi
|
||||
; AVX-NEXT: sbbq $0, %rcx
|
||||
; AVX-NEXT: movabsq $-5270498306774157605, %rbx # imm = 0xB6DB6DB6DB6DB6DB
|
||||
; AVX-NEXT: movq %rsi, %r10
|
||||
; AVX-NEXT: imulq %rbx, %r10
|
||||
; AVX-NEXT: movabsq $7905747460161236407, %r14 # imm = 0x6DB6DB6DB6DB6DB7
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: mulq %r14
|
||||
; AVX-NEXT: movq %rax, %rsi
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: imulq %r14, %rcx
|
||||
; AVX-NEXT: addq %rdx, %rcx
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: shrdq $1, %r8, %rax
|
||||
; AVX-NEXT: movq %r8, %rdx
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: shldq $4, %rax, %rdx
|
||||
; AVX-NEXT: andq %r11, %rax
|
||||
; AVX-NEXT: andq %r11, %rdx
|
||||
; AVX-NEXT: movq %r8, %r10
|
||||
; AVX-NEXT: shrq $57, %r10
|
||||
; AVX-NEXT: addq %rax, %r10
|
||||
; AVX-NEXT: addq %rdx, %r10
|
||||
; AVX-NEXT: movq %r10, %rax
|
||||
; AVX-NEXT: mulq %r15
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leaq (,%rdx,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rdx
|
||||
; AVX-NEXT: addq %r10, %rdx
|
||||
; AVX-NEXT: subq %rdx, %r9
|
||||
; AVX-NEXT: sbbq $0, %r8
|
||||
; AVX-NEXT: imulq %r9, %rbx
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: mulq %r14
|
||||
; AVX-NEXT: addq %rbx, %rdx
|
||||
; AVX-NEXT: imulq %r14, %r8
|
||||
; AVX-NEXT: addq %rdx, %r8
|
||||
; AVX-NEXT: movq %rax, 16(%rdi)
|
||||
; AVX-NEXT: movq %rsi, (%rdi)
|
||||
; AVX-NEXT: movq %r8, 24(%rdi)
|
||||
; AVX-NEXT: movq %rcx, 8(%rdi)
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: popq %rbx
|
||||
; AVX-NEXT: popq %r14
|
||||
; AVX-NEXT: popq %r15
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%div = udiv <2 x i128> %x, <i128 14, i128 14>
|
||||
ret <2 x i128> %div
|
||||
}
|
||||
|
||||
define <2 x i128> @v2i128_rem_by_7(<2 x i128> %x) nounwind {
|
||||
; SSE-LABEL: v2i128_rem_by_7:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movq %rdx, %r9
|
||||
; SSE-NEXT: movabsq $1152921504606846975, %r10 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: andq %r10, %rax
|
||||
; SSE-NEXT: shrdq $60, %rdx, %rsi
|
||||
; SSE-NEXT: andq %r10, %rsi
|
||||
; SSE-NEXT: addq %rax, %rsi
|
||||
; SSE-NEXT: shrq $56, %r9
|
||||
; SSE-NEXT: addq %rsi, %r9
|
||||
; SSE-NEXT: movabsq $5270498306774157605, %r11 # imm = 0x4924924924924925
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: mulq %r11
|
||||
; SSE-NEXT: movq %rdx, %rsi
|
||||
; SSE-NEXT: shrq %rsi
|
||||
; SSE-NEXT: leaq (,%rsi,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rsi
|
||||
; SSE-NEXT: addq %r9, %rsi
|
||||
; SSE-NEXT: movq %rcx, %rax
|
||||
; SSE-NEXT: andq %r10, %rax
|
||||
; SSE-NEXT: shrdq $60, %r8, %rcx
|
||||
; SSE-NEXT: andq %r10, %rcx
|
||||
; SSE-NEXT: addq %rax, %rcx
|
||||
; SSE-NEXT: shrq $56, %r8
|
||||
; SSE-NEXT: addq %rcx, %r8
|
||||
; SSE-NEXT: movq %r8, %rax
|
||||
; SSE-NEXT: mulq %r11
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leaq (,%rdx,8), %rax
|
||||
; SSE-NEXT: subq %rax, %rdx
|
||||
; SSE-NEXT: addq %r8, %rdx
|
||||
; SSE-NEXT: movq %rdx, 16(%rdi)
|
||||
; SSE-NEXT: movq %rsi, (%rdi)
|
||||
; SSE-NEXT: movq $0, 24(%rdi)
|
||||
; SSE-NEXT: movq $0, 8(%rdi)
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128_rem_by_7:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movq %rdx, %r9
|
||||
; AVX-NEXT: movabsq $1152921504606846975, %r10 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: andq %r10, %rax
|
||||
; AVX-NEXT: shrdq $60, %rdx, %rsi
|
||||
; AVX-NEXT: andq %r10, %rsi
|
||||
; AVX-NEXT: addq %rax, %rsi
|
||||
; AVX-NEXT: shrq $56, %r9
|
||||
; AVX-NEXT: addq %rsi, %r9
|
||||
; AVX-NEXT: movabsq $5270498306774157605, %r11 # imm = 0x4924924924924925
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: mulq %r11
|
||||
; AVX-NEXT: movq %rdx, %rsi
|
||||
; AVX-NEXT: shrq %rsi
|
||||
; AVX-NEXT: leaq (,%rsi,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rsi
|
||||
; AVX-NEXT: addq %r9, %rsi
|
||||
; AVX-NEXT: movq %rcx, %rax
|
||||
; AVX-NEXT: andq %r10, %rax
|
||||
; AVX-NEXT: shrdq $60, %r8, %rcx
|
||||
; AVX-NEXT: andq %r10, %rcx
|
||||
; AVX-NEXT: addq %rax, %rcx
|
||||
; AVX-NEXT: shrq $56, %r8
|
||||
; AVX-NEXT: addq %rcx, %r8
|
||||
; AVX-NEXT: movq %r8, %rax
|
||||
; AVX-NEXT: mulq %r11
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leaq (,%rdx,8), %rax
|
||||
; AVX-NEXT: subq %rax, %rdx
|
||||
; AVX-NEXT: addq %r8, %rdx
|
||||
; AVX-NEXT: movq %rdx, 16(%rdi)
|
||||
; AVX-NEXT: movq %rsi, (%rdi)
|
||||
; AVX-NEXT: movq $0, 24(%rdi)
|
||||
; AVX-NEXT: movq $0, 8(%rdi)
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%rem = urem <2 x i128> %x, <i128 7, i128 7>
|
||||
ret <2 x i128> %rem
|
||||
}
|
||||
|
||||
define <2 x i128> @v2i128_rem_by_14(<2 x i128> %x) nounwind {
|
||||
; SSE-LABEL: v2i128_rem_by_14:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: movq %rdx, %r9
|
||||
; SSE-NEXT: movq %rsi, %rax
|
||||
; SSE-NEXT: shrdq $1, %rdx, %rax
|
||||
; SSE-NEXT: movabsq $1152921504606846975, %r10 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: shldq $4, %rax, %rdx
|
||||
; SSE-NEXT: andq %r10, %rax
|
||||
; SSE-NEXT: andq %r10, %rdx
|
||||
; SSE-NEXT: shrq $57, %r9
|
||||
; SSE-NEXT: addq %rax, %r9
|
||||
; SSE-NEXT: addq %rdx, %r9
|
||||
; SSE-NEXT: movabsq $5270498306774157605, %r11 # imm = 0x4924924924924925
|
||||
; SSE-NEXT: movq %r9, %rax
|
||||
; SSE-NEXT: mulq %r11
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leal (,%rdx,8), %eax
|
||||
; SSE-NEXT: subl %eax, %edx
|
||||
; SSE-NEXT: addl %edx, %r9d
|
||||
; SSE-NEXT: andl $1, %esi
|
||||
; SSE-NEXT: leaq (%rsi,%r9,2), %rsi
|
||||
; SSE-NEXT: movq %rcx, %rax
|
||||
; SSE-NEXT: shrdq $1, %r8, %rax
|
||||
; SSE-NEXT: movq %r8, %rdx
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: shldq $4, %rax, %rdx
|
||||
; SSE-NEXT: andq %r10, %rax
|
||||
; SSE-NEXT: andq %r10, %rdx
|
||||
; SSE-NEXT: shrq $57, %r8
|
||||
; SSE-NEXT: addq %rax, %r8
|
||||
; SSE-NEXT: addq %rdx, %r8
|
||||
; SSE-NEXT: movq %r8, %rax
|
||||
; SSE-NEXT: mulq %r11
|
||||
; SSE-NEXT: shrq %rdx
|
||||
; SSE-NEXT: leal (,%rdx,8), %eax
|
||||
; SSE-NEXT: subl %eax, %edx
|
||||
; SSE-NEXT: addl %edx, %r8d
|
||||
; SSE-NEXT: andl $1, %ecx
|
||||
; SSE-NEXT: leaq (%rcx,%r8,2), %rax
|
||||
; SSE-NEXT: movq %rax, 16(%rdi)
|
||||
; SSE-NEXT: movq %rsi, (%rdi)
|
||||
; SSE-NEXT: movq $0, 24(%rdi)
|
||||
; SSE-NEXT: movq $0, 8(%rdi)
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128_rem_by_14:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movq %rdx, %r9
|
||||
; AVX-NEXT: movq %rsi, %rax
|
||||
; AVX-NEXT: shrdq $1, %rdx, %rax
|
||||
; AVX-NEXT: movabsq $1152921504606846975, %r10 # imm = 0xFFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: shldq $4, %rax, %rdx
|
||||
; AVX-NEXT: andq %r10, %rax
|
||||
; AVX-NEXT: andq %r10, %rdx
|
||||
; AVX-NEXT: shrq $57, %r9
|
||||
; AVX-NEXT: addq %rax, %r9
|
||||
; AVX-NEXT: addq %rdx, %r9
|
||||
; AVX-NEXT: movabsq $5270498306774157605, %r11 # imm = 0x4924924924924925
|
||||
; AVX-NEXT: movq %r9, %rax
|
||||
; AVX-NEXT: mulq %r11
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leal (,%rdx,8), %eax
|
||||
; AVX-NEXT: subl %eax, %edx
|
||||
; AVX-NEXT: addl %edx, %r9d
|
||||
; AVX-NEXT: andl $1, %esi
|
||||
; AVX-NEXT: leaq (%rsi,%r9,2), %rsi
|
||||
; AVX-NEXT: movq %rcx, %rax
|
||||
; AVX-NEXT: shrdq $1, %r8, %rax
|
||||
; AVX-NEXT: movq %r8, %rdx
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: shldq $4, %rax, %rdx
|
||||
; AVX-NEXT: andq %r10, %rax
|
||||
; AVX-NEXT: andq %r10, %rdx
|
||||
; AVX-NEXT: shrq $57, %r8
|
||||
; AVX-NEXT: addq %rax, %r8
|
||||
; AVX-NEXT: addq %rdx, %r8
|
||||
; AVX-NEXT: movq %r8, %rax
|
||||
; AVX-NEXT: mulq %r11
|
||||
; AVX-NEXT: shrq %rdx
|
||||
; AVX-NEXT: leal (,%rdx,8), %eax
|
||||
; AVX-NEXT: subl %eax, %edx
|
||||
; AVX-NEXT: addl %edx, %r8d
|
||||
; AVX-NEXT: andl $1, %ecx
|
||||
; AVX-NEXT: leaq (%rcx,%r8,2), %rax
|
||||
; AVX-NEXT: movq %rax, 16(%rdi)
|
||||
; AVX-NEXT: movq %rsi, (%rdi)
|
||||
; AVX-NEXT: movq $0, 24(%rdi)
|
||||
; AVX-NEXT: movq $0, 8(%rdi)
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%rem = urem <2 x i128> %x, <i128 14, i128 14>
|
||||
ret <2 x i128> %rem
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user