llvm-project/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
Craig Topper 1fa8fd4c33 Recommit "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
This reverts commit 65aaecca8842dec30d03734a7fe8ce33c5afec81.

There was an ordering problem in the calculation of the partial
remainder.

Original commit message:

If the divisor is even, we can first shift the dividend and divisor
right by the number of trailing zeros. Now the divisor is odd and we
can do the original algorithm to calculate a remainder. Then we shift
that remainder left by the number of trailing zeros and add the bits
that were shifted out of the dividend.

Differential Revision: https://reviews.llvm.org/D135541
2022-10-24 10:08:50 -07:00

383 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/iXLen2/i64/g' %s | llc -mtriple=riscv32 -mattr=+m | \
; RUN: FileCheck %s --check-prefix=RV32
; RUN: sed 's/iXLen2/i128/g' %s | llc -mtriple=riscv64 -mattr=+m | \
; RUN: FileCheck %s --check-prefix=RV64
define iXLen2 @test_urem_3(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_3:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 699051
; RV32-NEXT: addi a1, a1, -1365
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a2, a1, 1
; RV32-NEXT: andi a1, a1, -2
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_3:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI0_0)
; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a2, a1, 1
; RV64-NEXT: andi a1, a1, -2
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 3
ret iXLen2 %a
}
define iXLen2 @test_urem_5(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_5:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 838861
; RV32-NEXT: addi a1, a1, -819
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a2, a1, 2
; RV32-NEXT: andi a1, a1, -4
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_5:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI1_0)
; RV64-NEXT: ld a2, %lo(.LCPI1_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a2, a1, 2
; RV64-NEXT: andi a1, a1, -4
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 5
ret iXLen2 %a
}
define iXLen2 @test_urem_7(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_7:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: li a2, 7
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __umoddi3@plt
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_7:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: li a2, 7
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __umodti3@plt
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = urem iXLen2 %x, 7
ret iXLen2 %a
}
define iXLen2 @test_urem_9(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_9:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: li a2, 9
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __umoddi3@plt
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_9:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: li a2, 9
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __umodti3@plt
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = urem iXLen2 %x, 9
ret iXLen2 %a
}
define iXLen2 @test_urem_15(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_15:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 559241
; RV32-NEXT: addi a1, a1, -1911
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a1, a1, 3
; RV32-NEXT: slli a2, a1, 4
; RV32-NEXT: sub a1, a1, a2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_15:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI4_0)
; RV64-NEXT: ld a2, %lo(.LCPI4_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a1, a1, 3
; RV64-NEXT: slli a2, a1, 4
; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 15
ret iXLen2 %a
}
define iXLen2 @test_urem_17(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_17:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 986895
; RV32-NEXT: addi a1, a1, 241
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a2, a1, 4
; RV32-NEXT: andi a1, a1, -16
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_17:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI5_0)
; RV64-NEXT: ld a2, %lo(.LCPI5_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a2, a1, 4
; RV64-NEXT: andi a1, a1, -16
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 17
ret iXLen2 %a
}
define iXLen2 @test_urem_255(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_255:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 526344
; RV32-NEXT: addi a1, a1, 129
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a1, a1, 7
; RV32-NEXT: slli a2, a1, 8
; RV32-NEXT: sub a1, a1, a2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_255:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI6_0)
; RV64-NEXT: ld a2, %lo(.LCPI6_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a1, a1, 7
; RV64-NEXT: slli a2, a1, 8
; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 255
ret iXLen2 %a
}
define iXLen2 @test_urem_257(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_257:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 1044496
; RV32-NEXT: addi a1, a1, -255
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a2, a1, 8
; RV32-NEXT: andi a1, a1, -256
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_257:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI7_0)
; RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a2, a1, 8
; RV64-NEXT: andi a1, a1, -256
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 257
ret iXLen2 %a
}
define iXLen2 @test_urem_65535(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_65535:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 524296
; RV32-NEXT: addi a1, a1, 1
; RV32-NEXT: mulhu a1, a0, a1
; RV32-NEXT: srli a1, a1, 15
; RV32-NEXT: slli a2, a1, 16
; RV32-NEXT: sub a1, a1, a2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_65535:
; RV64: # %bb.0:
; RV64-NEXT: lui a2, %hi(.LCPI8_0)
; RV64-NEXT: ld a2, %lo(.LCPI8_0)(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: mulhu a1, a0, a2
; RV64-NEXT: srli a1, a1, 15
; RV64-NEXT: slli a2, a1, 16
; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 65535
ret iXLen2 %a
}
define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_65537:
; RV32: # %bb.0:
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: lui a1, 1048560
; RV32-NEXT: addi a2, a1, 1
; RV32-NEXT: mulhu a2, a0, a2
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: srli a2, a2, 16
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_65537:
; RV64: # %bb.0:
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: lui a1, 983041
; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: slli a1, a1, 16
; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: mulhu a1, a0, a1
; RV64-NEXT: lui a2, 1048560
; RV64-NEXT: and a2, a1, a2
; RV64-NEXT: srli a1, a1, 16
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 65537
ret iXLen2 %a
}
define iXLen2 @test_urem_12(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_12:
; RV32: # %bb.0:
; RV32-NEXT: slli a2, a1, 30
; RV32-NEXT: srli a3, a0, 2
; RV32-NEXT: or a2, a3, a2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: sltu a2, a1, a2
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: lui a2, 699051
; RV32-NEXT: addi a2, a2, -1365
; RV32-NEXT: mulhu a2, a1, a2
; RV32-NEXT: srli a3, a2, 1
; RV32-NEXT: andi a2, a2, -2
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: sub a1, a1, a2
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: andi a0, a0, 3
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: li a1, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_12:
; RV64: # %bb.0:
; RV64-NEXT: slli a2, a1, 62
; RV64-NEXT: srli a3, a0, 2
; RV64-NEXT: or a2, a3, a2
; RV64-NEXT: srli a1, a1, 2
; RV64-NEXT: lui a3, %hi(.LCPI10_0)
; RV64-NEXT: ld a3, %lo(.LCPI10_0)(a3)
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: sltu a2, a1, a2
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: mulhu a2, a1, a3
; RV64-NEXT: srli a3, a2, 1
; RV64-NEXT: andi a2, a2, -2
; RV64-NEXT: add a2, a2, a3
; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: andi a0, a0, 3
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: li a1, 0
; RV64-NEXT: ret
%a = urem iXLen2 %x, 12
ret iXLen2 %a
}