llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
Gaëtan Bossu 9828745661
[AArch64][ISel] Select constructive EXT_ZZI pseudo instruction (#152554)
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo
instead of the EXT_ZZI destructive instruction for vector_splice. This
only works when the two inputs to vector_splice are identical.

Given that registers aren't tied anymore, this gives the register
allocator more freedom and a lot of MOVs get replaced with MOVPRFX.

In some cases however, we could have just chosen the same input and
output register, but regalloc preferred not to. This means we end up
with some test cases now having more instructions: there is now a
MOVPRFX while no MOV was previously needed.
2025-08-15 14:30:24 +01:00

2107 lines
81 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
;
; SREM
;
define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-LABEL: srem_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p1.s, vl4
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #20]
; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #12]
; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #18]
; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #10]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #16]
; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #8]
; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12
; NONEON-NOSVE-NEXT: strh w9, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w0, w18, w17
; NONEON-NOSVE-NEXT: msub w10, w16, w14, w15
; NONEON-NOSVE-NEXT: strh w10, [sp, #26]
; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i8> %op1, %op2
ret <4 x i8> %res
}
define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-LABEL: srem_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: sunpklo z2.h, z1.b
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #31]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #29]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #27]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #26]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #25]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = srem <8 x i8> %op1, %op2
ret <8 x i8> %res
}
define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: srem_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: sunpklo z2.h, z1.b
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
; CHECK-NEXT: sunpklo z5.h, z5.b
; CHECK-NEXT: sunpklo z7.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
; CHECK-NEXT: sunpklo z5.s, z5.h
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
; CHECK-NEXT: sunpklo z3.h, z3.b
; CHECK-NEXT: sunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7]
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6]
; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5]
; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4]
; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3]
; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2]
; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1]
; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = srem <16 x i8> %op1, %op2
ret <16 x i8> %res
}
define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: ldr q1, [x1, #16]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sunpklo z2.h, z1.b
; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: sunpklo z4.s, z2.h
; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: movprfx z5, z3
; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z2.s
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
; CHECK-NEXT: sunpklo z7.h, z2.b
; CHECK-NEXT: sunpklo z16.h, z3.b
; CHECK-NEXT: sunpklo z2.s, z7.h
; CHECK-NEXT: sunpklo z3.s, z16.h
; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: movprfx z6, z3
; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z2.s
; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: ldr q3, [x1]
; CHECK-NEXT: sunpklo z16.s, z16.h
; CHECK-NEXT: sunpklo z17.h, z3.b
; CHECK-NEXT: sunpklo z18.h, z2.b
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: sunpklo z19.s, z17.h
; CHECK-NEXT: sunpklo z20.s, z18.h
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: sunpklo z17.s, z17.h
; CHECK-NEXT: sunpklo z18.s, z18.h
; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: movprfx z20, z2
; CHECK-NEXT: ext z20.b, z20.b, z2.b, #8
; CHECK-NEXT: sunpklo z20.h, z20.b
; CHECK-NEXT: sunpklo z22.s, z20.h
; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8
; CHECK-NEXT: sunpklo z20.s, z20.h
; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s
; CHECK-NEXT: movprfx z18, z3
; CHECK-NEXT: ext z18.b, z18.b, z3.b, #8
; CHECK-NEXT: sunpklo z18.h, z18.b
; CHECK-NEXT: sunpklo z21.s, z18.h
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: sunpklo z18.s, z18.h
; CHECK-NEXT: sdivr z21.s, p0/m, z21.s, z22.s
; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s
; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h
; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
; CHECK-NEXT: uzp1 z4.h, z21.h, z21.h
; CHECK-NEXT: splice z6.h, p0, { z19.h, z20.h }
; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h
; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
; CHECK-NEXT: splice z5.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b
; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z17.b, z4.b, z4.b
; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
; CHECK-NEXT: splice z4.b, p0, { z16.b, z17.b }
; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z2.b, p0/m, z4.b, z3.b
; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #63]
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46]
; NONEON-NOSVE-NEXT: strb w8, [sp, #95]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #62]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45]
; NONEON-NOSVE-NEXT: strb w8, [sp, #94]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #61]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44]
; NONEON-NOSVE-NEXT: strb w8, [sp, #93]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #60]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43]
; NONEON-NOSVE-NEXT: strb w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #59]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42]
; NONEON-NOSVE-NEXT: strb w8, [sp, #91]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #58]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41]
; NONEON-NOSVE-NEXT: strb w8, [sp, #90]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #57]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40]
; NONEON-NOSVE-NEXT: strb w8, [sp, #89]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #56]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39]
; NONEON-NOSVE-NEXT: strb w8, [sp, #88]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #55]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38]
; NONEON-NOSVE-NEXT: strb w8, [sp, #87]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #54]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37]
; NONEON-NOSVE-NEXT: strb w8, [sp, #86]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #53]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36]
; NONEON-NOSVE-NEXT: strb w8, [sp, #85]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #52]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35]
; NONEON-NOSVE-NEXT: strb w8, [sp, #84]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #51]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34]
; NONEON-NOSVE-NEXT: strb w8, [sp, #83]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #50]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33]
; NONEON-NOSVE-NEXT: strb w8, [sp, #82]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #49]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32]
; NONEON-NOSVE-NEXT: strb w8, [sp, #81]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
; NONEON-NOSVE-NEXT: strb w8, [sp, #80]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #79]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #77]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #75]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #73]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7]
; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6]
; NONEON-NOSVE-NEXT: strb w8, [sp, #71]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5]
; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4]
; NONEON-NOSVE-NEXT: strb w8, [sp, #69]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3]
; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2]
; NONEON-NOSVE-NEXT: strb w8, [sp, #67]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1]
; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsb w9, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #65]
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = srem <32 x i8> %op1, %op2
store <32 x i8> %res, ptr %a
ret void
}
define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-LABEL: srem_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22]
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i16> %op1, %op2
ret <4 x i16> %res
}
define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: srem_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sunpklo z4.s, z4.h
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6]
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4]
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2]
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = srem <8 x i16> %op1, %op2
ret <8 x i16> %res
}
define void @srem_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q4, q1, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: sunpklo z5.s, z4.h
; CHECK-NEXT: movprfx z16, z0
; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: sunpklo z16.s, z16.h
; CHECK-NEXT: sunpklo z6.s, z3.h
; CHECK-NEXT: movprfx z7, z3
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: movprfx z6, z4
; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8
; CHECK-NEXT: sunpklo z6.s, z6.h
; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: movprfx z7, z1
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62]
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44]
; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42]
; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40]
; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38]
; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36]
; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34]
; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32]
; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14]
; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6]
; NONEON-NOSVE-NEXT: strh w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4]
; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2]
; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrsh w9, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = srem <16 x i16> %op1, %op2
store <16 x i16> %res, ptr %a
ret void
}
define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: srem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: str w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = srem <2 x i32> %op1, %op2
ret <2 x i32> %res
}
define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-LABEL: srem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
; NONEON-NOSVE-NEXT: sdiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w9, [sp]
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #36]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: str w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i32> %op1, %op2
ret <4 x i32> %res
}
define void @srem_v8i32(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: movprfx z4, z1
; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z0.s
; CHECK-NEXT: movprfx z5, z2
; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z3.s
; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #40]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60]
; NONEON-NOSVE-NEXT: sdiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #36]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w9, [sp, #32]
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #84]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #76]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #4]
; NONEON-NOSVE-NEXT: sdiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w9, [sp]
; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = srem <8 x i32> %op1, %op2
store <8 x i32> %res, ptr %a
ret void
}
define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: srem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = srem <1 x i64> %op1, %op2
ret <1 x i64> %res
}
define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-LABEL: srem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp x9, x11, [sp]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
; NONEON-NOSVE-NEXT: sdiv x10, x11, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11
; NONEON-NOSVE-NEXT: str x8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = srem <2 x i64> %op1, %op2
ret <2 x i64> %res
}
define void @srem_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: movprfx z4, z1
; CHECK-NEXT: sdiv z4.d, p0/m, z4.d, z0.d
; CHECK-NEXT: movprfx z5, z2
; CHECK-NEXT: sdiv z5.d, p0/m, z5.d, z3.d
; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp x9, x11, [sp, #32]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #56]
; NONEON-NOSVE-NEXT: sdiv x10, x11, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11
; NONEON-NOSVE-NEXT: str x8, [sp, #88]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #48]
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x11, x10, x8, x9
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: ldr x9, [sp]
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #72]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = srem <4 x i64> %op1, %op2
store <4 x i64> %res, ptr %a
ret void
}
;
; UREM
;
define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-LABEL: urem_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #20]
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #12]
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18]
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #16]
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #8]
; NONEON-NOSVE-NEXT: udiv w13, w12, w11
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
; NONEON-NOSVE-NEXT: udiv w16, w15, w14
; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12
; NONEON-NOSVE-NEXT: strh w9, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w0, w18, w17
; NONEON-NOSVE-NEXT: msub w10, w16, w14, w15
; NONEON-NOSVE-NEXT: strh w10, [sp, #26]
; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i8> %op1, %op2
ret <4 x i8> %res
}
define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-LABEL: urem_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: uunpklo z2.h, z1.b
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #31]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #29]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #27]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #26]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #25]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = urem <8 x i8> %op1, %op2
ret <8 x i8> %res
}
define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: urem_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.h, z1.b
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
; CHECK-NEXT: uunpklo z5.h, z5.b
; CHECK-NEXT: uunpklo z7.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
; CHECK-NEXT: uunpklo z5.s, z5.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
; CHECK-NEXT: uunpklo z3.h, z3.b
; CHECK-NEXT: uunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = urem <16 x i8> %op1, %op2
ret <16 x i8> %res
}
define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: ldr q1, [x1, #16]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z2.h, z1.b
; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: uunpklo z4.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: movprfx z5, z3
; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z2.s
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
; CHECK-NEXT: uunpklo z7.h, z2.b
; CHECK-NEXT: uunpklo z16.h, z3.b
; CHECK-NEXT: uunpklo z2.s, z7.h
; CHECK-NEXT: uunpklo z3.s, z16.h
; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
; CHECK-NEXT: uunpklo z7.s, z7.h
; CHECK-NEXT: movprfx z6, z3
; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z2.s
; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: ldr q3, [x1]
; CHECK-NEXT: uunpklo z16.s, z16.h
; CHECK-NEXT: uunpklo z17.h, z3.b
; CHECK-NEXT: uunpklo z18.h, z2.b
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: uunpklo z19.s, z17.h
; CHECK-NEXT: uunpklo z20.s, z18.h
; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: uunpklo z17.s, z17.h
; CHECK-NEXT: uunpklo z18.s, z18.h
; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: movprfx z20, z2
; CHECK-NEXT: ext z20.b, z20.b, z2.b, #8
; CHECK-NEXT: uunpklo z20.h, z20.b
; CHECK-NEXT: uunpklo z22.s, z20.h
; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8
; CHECK-NEXT: uunpklo z20.s, z20.h
; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s
; CHECK-NEXT: movprfx z18, z3
; CHECK-NEXT: ext z18.b, z18.b, z3.b, #8
; CHECK-NEXT: uunpklo z18.h, z18.b
; CHECK-NEXT: uunpklo z21.s, z18.h
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: uunpklo z18.s, z18.h
; CHECK-NEXT: udivr z21.s, p0/m, z21.s, z22.s
; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s
; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h
; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
; CHECK-NEXT: uzp1 z4.h, z21.h, z21.h
; CHECK-NEXT: splice z6.h, p0, { z19.h, z20.h }
; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h
; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
; CHECK-NEXT: splice z5.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b
; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z17.b, z4.b, z4.b
; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
; CHECK-NEXT: splice z4.b, p0, { z16.b, z17.b }
; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z2.b, p0/m, z4.b, z3.b
; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63]
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
; NONEON-NOSVE-NEXT: strb w8, [sp, #95]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45]
; NONEON-NOSVE-NEXT: strb w8, [sp, #94]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
; NONEON-NOSVE-NEXT: strb w8, [sp, #93]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43]
; NONEON-NOSVE-NEXT: strb w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42]
; NONEON-NOSVE-NEXT: strb w8, [sp, #91]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41]
; NONEON-NOSVE-NEXT: strb w8, [sp, #90]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
; NONEON-NOSVE-NEXT: strb w8, [sp, #89]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39]
; NONEON-NOSVE-NEXT: strb w8, [sp, #88]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38]
; NONEON-NOSVE-NEXT: strb w8, [sp, #87]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37]
; NONEON-NOSVE-NEXT: strb w8, [sp, #86]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36]
; NONEON-NOSVE-NEXT: strb w8, [sp, #85]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35]
; NONEON-NOSVE-NEXT: strb w8, [sp, #84]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34]
; NONEON-NOSVE-NEXT: strb w8, [sp, #83]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33]
; NONEON-NOSVE-NEXT: strb w8, [sp, #82]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32]
; NONEON-NOSVE-NEXT: strb w8, [sp, #81]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
; NONEON-NOSVE-NEXT: strb w8, [sp, #80]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #79]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #77]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #75]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #73]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
; NONEON-NOSVE-NEXT: strb w8, [sp, #71]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
; NONEON-NOSVE-NEXT: strb w8, [sp, #69]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
; NONEON-NOSVE-NEXT: strb w8, [sp, #67]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrb w9, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #65]
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = urem <32 x i8> %op1, %op2
store <32 x i8> %res, ptr %a
ret void
}
define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-LABEL: urem_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i16> %op1, %op2
ret <4 x i16> %res
}
define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: urem_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z4.s, z4.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = urem <8 x i16> %op1, %op2
ret <8 x i16> %res
}
define void @urem_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q4, q1, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: uunpklo z5.s, z4.h
; CHECK-NEXT: movprfx z16, z0
; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: uunpklo z16.s, z16.h
; CHECK-NEXT: uunpklo z6.s, z3.h
; CHECK-NEXT: movprfx z7, z3
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
; CHECK-NEXT: uunpklo z7.s, z7.h
; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: movprfx z6, z4
; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8
; CHECK-NEXT: uunpklo z6.s, z6.h
; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: movprfx z7, z1
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
; CHECK-NEXT: uunpklo z7.s, z7.h
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44]
; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42]
; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40]
; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38]
; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36]
; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34]
; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32]
; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
; NONEON-NOSVE-NEXT: strh w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
; NONEON-NOSVE-NEXT: strh w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
; NONEON-NOSVE-NEXT: strh w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldrh w9, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = urem <16 x i16> %op1, %op2
store <16 x i16> %res, ptr %a
ret void
}
define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: urem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: str w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = urem <2 x i32> %op1, %op2
ret <2 x i32> %res
}
define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-LABEL: urem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
; NONEON-NOSVE-NEXT: udiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w9, [sp]
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #36]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: str w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i32> %op1, %op2
ret <4 x i32> %res
}
define void @urem_v8i32(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: movprfx z4, z1
; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z0.s
; CHECK-NEXT: movprfx z5, z2
; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z3.s
; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #40]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60]
; NONEON-NOSVE-NEXT: udiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #92]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #36]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w9, [sp, #32]
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #84]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #76]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #4]
; NONEON-NOSVE-NEXT: udiv w10, w11, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11
; NONEON-NOSVE-NEXT: str w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w9, [sp]
; NONEON-NOSVE-NEXT: udiv w10, w9, w8
; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = urem <8 x i32> %op1, %op2
store <8 x i32> %res, ptr %a
ret void
}
define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: urem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: fmov x8, d1
; NONEON-NOSVE-NEXT: fmov x9, d0
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = urem <1 x i64> %op1, %op2
ret <1 x i64> %res
}
define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-LABEL: urem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp x9, x11, [sp]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
; NONEON-NOSVE-NEXT: udiv x10, x11, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11
; NONEON-NOSVE-NEXT: str x8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = urem <2 x i64> %op1, %op2
ret <2 x i64> %res
}
define void @urem_v4i64(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: movprfx z4, z1
; CHECK-NEXT: udiv z4.d, p0/m, z4.d, z0.d
; CHECK-NEXT: movprfx z5, z2
; CHECK-NEXT: udiv z5.d, p0/m, z5.d, z3.d
; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp x9, x11, [sp, #32]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #56]
; NONEON-NOSVE-NEXT: udiv x10, x11, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11
; NONEON-NOSVE-NEXT: str x8, [sp, #88]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #48]
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x11, x10, x8, x9
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: ldr x9, [sp]
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #72]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
; NONEON-NOSVE-NEXT: udiv x10, x9, x8
; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
; NONEON-NOSVE-NEXT: str x8, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = urem <4 x i64> %op1, %op2
store <4 x i64> %res, ptr %a
ret void
}