
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
3016 lines
116 KiB
LLVM
3016 lines
116 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
|
|
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
|
|
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
|
|
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; sext i1 -> i32
|
|
;
|
|
|
|
; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
|
|
; type's element type is not byte based and thus cannot be lowered directly to
|
|
; an SVE instruction.
|
|
define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i1_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: lsl z0.s, z0.s, #31
|
|
; CHECK-NEXT: lsl z1.s, z1.s, #31
|
|
; CHECK-NEXT: asr z0.s, z0.s, #31
|
|
; CHECK-NEXT: asr z1.s, z1.s, #31
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1
|
|
; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1
|
|
; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1
|
|
; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: sbfx w8, w14, #0, #1
|
|
; NONEON-NOSVE-NEXT: sbfx w10, w15, #0, #1
|
|
; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: sbfx w12, w13, #0, #1
|
|
; NONEON-NOSVE-NEXT: sbfx w8, w11, #0, #1
|
|
; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <8 x i1> %a to <8 x i32>
|
|
store <8 x i32> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i3 -> i64
|
|
;
|
|
|
|
; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
|
|
; type's element type is not power-of-2 based and thus cannot be lowered
|
|
; directly to an SVE instruction.
|
|
define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v4i3_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: lsl z0.d, z0.d, #61
|
|
; CHECK-NEXT: lsl z1.d, z1.d, #61
|
|
; CHECK-NEXT: asr z0.d, z0.d, #61
|
|
; CHECK-NEXT: asr z1.d, z1.d, #61
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v4i3_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d0, d1, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: sbfx x8, x8, #0, #3
|
|
; NONEON-NOSVE-NEXT: sbfx x9, x9, #0, #3
|
|
; NONEON-NOSVE-NEXT: sbfx x10, x10, #0, #3
|
|
; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sbfx x8, x11, #0, #3
|
|
; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <4 x i3> %a to <4 x i64>
|
|
store <4 x i64> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i8 -> i16
|
|
;
|
|
|
|
define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v16i8_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v16i8_v16i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <16 x i8> %a to <16 x i16>
|
|
store <16 x i16>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
|
|
define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v32i8_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: sunpklo z2.h, z2.b
|
|
; CHECK-NEXT: sunpklo z3.h, z3.b
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #208
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: add w5, w14, w14
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: sxtb w5, w5
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: strh w6, [sp, #78]
|
|
; NONEON-NOSVE-NEXT: add w6, w30, w30
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w6
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: strh w5, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: add w5, w29, w29
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w9
|
|
; NONEON-NOSVE-NEXT: sxtb w5, w5
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
|
|
; NONEON-NOSVE-NEXT: add w8, w27, w27
|
|
; NONEON-NOSVE-NEXT: ldrb w23, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: strh w6, [sp, #70]
|
|
; NONEON-NOSVE-NEXT: add w6, w28, w28
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w6
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #66]
|
|
; NONEON-NOSVE-NEXT: add w9, w26, w26
|
|
; NONEON-NOSVE-NEXT: strh w5, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: add w5, w25, w25
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w9
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w21, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w22, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: strh w6, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: add w6, w24, w24
|
|
; NONEON-NOSVE-NEXT: add w5, w23, w23
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w20, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: add w6, w22, w22
|
|
; NONEON-NOSVE-NEXT: add w5, w21, w21
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: add w6, w20, w20
|
|
; NONEON-NOSVE-NEXT: add w4, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: add w18, w15, w15
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w19, w18
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w4
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: add w2, w2, w2
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #110]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w3
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #108]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w2
|
|
; NONEON-NOSVE-NEXT: sxtb w7, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: add w18, w18, w18
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #106]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w0
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #102]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #100]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #98]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w15
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w14
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #94]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w13
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w12
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #90]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w11
|
|
; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w10
|
|
; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #86]
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w11
|
|
; NONEON-NOSVE-NEXT: strh w7, [sp, #76]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w10
|
|
; NONEON-NOSVE-NEXT: strh w19, [sp, #74]
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #82]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #208
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = sext <32 x i8> %b to <32 x i16>
|
|
store <32 x i16> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i8 -> i32
|
|
;
|
|
|
|
define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i8_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <8 x i8> %a to <8 x i32>
|
|
store <8 x i32>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v16i8_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: sunpklo z3.s, z3.h
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <16 x i8> %a to <16 x i32>
|
|
store <16 x i32> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v32i8_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: sunpklo z3.h, z1.b
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.h, z2.b
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z3.s, z3.h
|
|
; CHECK-NEXT: sunpklo z4.s, z4.h
|
|
; CHECK-NEXT: sunpklo z5.s, z5.h
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z7, z1
|
|
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q3, q5, [x1]
|
|
; CHECK-NEXT: sunpklo z3.s, z7.h
|
|
; CHECK-NEXT: stp q0, q4, [x1, #64]
|
|
; CHECK-NEXT: sunpklo z0.s, z6.h
|
|
; CHECK-NEXT: stp q1, q3, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #272
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w17
|
|
; NONEON-NOSVE-NEXT: sxtb w7, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: add w5, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w18, w15, w15
|
|
; NONEON-NOSVE-NEXT: sxtb w19, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: add w6, w30, w30
|
|
; NONEON-NOSVE-NEXT: sxtb w5, w5
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w5, w19, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w5, w29, w29
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w9
|
|
; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: add w6, w28, w28
|
|
; NONEON-NOSVE-NEXT: sxtb w5, w5
|
|
; NONEON-NOSVE-NEXT: add w8, w27, w27
|
|
; NONEON-NOSVE-NEXT: sxtb w6, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w23, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: stp w5, w9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: add w9, w26, w26
|
|
; NONEON-NOSVE-NEXT: add w5, w25, w25
|
|
; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w9
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w21, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w22, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: add w6, w24, w24
|
|
; NONEON-NOSVE-NEXT: add w5, w23, w23
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w20, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: add w6, w22, w22
|
|
; NONEON-NOSVE-NEXT: add w5, w21, w21
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w5
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: add w6, w20, w20
|
|
; NONEON-NOSVE-NEXT: add w4, w4, w4
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w6
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w4
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: add w2, w2, w2
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w3
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w2
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: add w18, w18, w18
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w0
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w17
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w15
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w14
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w13
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w12
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: sxtb w9, w11
|
|
; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w10
|
|
; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: sxtb w11, w11
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
|
|
; NONEON-NOSVE-NEXT: sxtb w8, w10
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #272
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = sext <32 x i8> %b to <32 x i32>
|
|
store <32 x i32> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i8 -> i64
|
|
;
|
|
|
|
; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The sign
|
|
; extend is a two step process where the container is any_extend'd with the
|
|
; result feeding an inreg sign extend.
|
|
define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v4i8_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: sxtb z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v4i8_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d0, d1, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrsb x10, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsb x11, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: stp x10, x11, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <4 x i8> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i8_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #80
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #80
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <8 x i8> %a to <8 x i64>
|
|
store <8 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v16i8_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z3.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: sunpklo z4.d, z4.s
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: sunpklo z5.d, z5.s
|
|
; CHECK-NEXT: sunpklo z6.d, z6.s
|
|
; CHECK-NEXT: stp q0, q4, [x0]
|
|
; CHECK-NEXT: movprfx z0, z1
|
|
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q5, [x0, #64]
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: stp q2, q6, [x0, #32]
|
|
; CHECK-NEXT: stp q1, q0, [x0, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64]
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <16 x i8> %a to <16 x i64>
|
|
store <16 x i64> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v32i8_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z1.h, z1.b
|
|
; CHECK-NEXT: sunpklo z3.h, z3.b
|
|
; CHECK-NEXT: sunpklo z2.h, z2.b
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z5.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: movprfx z7, z3
|
|
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
|
|
; CHECK-NEXT: sunpklo z3.s, z3.h
|
|
; CHECK-NEXT: sunpklo z4.s, z4.h
|
|
; CHECK-NEXT: movprfx z16, z0
|
|
; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: sunpklo z6.s, z2.h
|
|
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z17, z5
|
|
; CHECK-NEXT: ext z17.b, z17.b, z5.b, #8
|
|
; CHECK-NEXT: sunpklo z7.s, z7.h
|
|
; CHECK-NEXT: sunpklo z5.d, z5.s
|
|
; CHECK-NEXT: sunpklo z16.d, z16.s
|
|
; CHECK-NEXT: movprfx z20, z3
|
|
; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8
|
|
; CHECK-NEXT: sunpklo z19.d, z4.s
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: sunpklo z17.d, z17.s
|
|
; CHECK-NEXT: sunpklo z18.d, z6.s
|
|
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
|
|
; CHECK-NEXT: str q5, [x1]
|
|
; CHECK-NEXT: stp q0, q16, [x1, #128]
|
|
; CHECK-NEXT: sunpklo z0.d, z1.s
|
|
; CHECK-NEXT: sunpklo z16.d, z20.s
|
|
; CHECK-NEXT: sunpklo z4.d, z4.s
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z6.d, z6.s
|
|
; CHECK-NEXT: stp q17, q0, [x1, #16]
|
|
; CHECK-NEXT: movprfx z0, z2
|
|
; CHECK-NEXT: ext z0.b, z0.b, z2.b, #8
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q16, [x1, #64]
|
|
; CHECK-NEXT: movprfx z3, z7
|
|
; CHECK-NEXT: ext z3.b, z3.b, z7.b, #8
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: stp q19, q4, [x1, #160]
|
|
; CHECK-NEXT: sunpklo z4.d, z7.s
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: stp q18, q6, [x1, #192]
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: str q1, [x1, #48]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #224]
|
|
; CHECK-NEXT: stp q4, q3, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #400
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: sxtb x19, w17
|
|
; NONEON-NOSVE-NEXT: sxtb x20, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: add w7, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w18, w15, w15
|
|
; NONEON-NOSVE-NEXT: sxtb x21, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #160]
|
|
; NONEON-NOSVE-NEXT: add w19, w30, w30
|
|
; NONEON-NOSVE-NEXT: sxtb x7, w7
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: sxtb x19, w19
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w8
|
|
; NONEON-NOSVE-NEXT: stp x7, x21, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w7, w29, w29
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w9
|
|
; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: add w19, w28, w28
|
|
; NONEON-NOSVE-NEXT: sxtb x7, w7
|
|
; NONEON-NOSVE-NEXT: add w8, w27, w27
|
|
; NONEON-NOSVE-NEXT: sxtb x19, w19
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w8
|
|
; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: stp x7, x9, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: add w9, w26, w26
|
|
; NONEON-NOSVE-NEXT: add w7, w25, w25
|
|
; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w9
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w7
|
|
; NONEON-NOSVE-NEXT: ldrb w6, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrb w22, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: add w19, w24, w24
|
|
; NONEON-NOSVE-NEXT: add w7, w23, w23
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w19
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w7
|
|
; NONEON-NOSVE-NEXT: ldrb w5, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: add w19, w22, w22
|
|
; NONEON-NOSVE-NEXT: add w6, w6, w6
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w19
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w6
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: add w5, w5, w5
|
|
; NONEON-NOSVE-NEXT: add w4, w4, w4
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w5
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w4
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: add w2, w2, w2
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w3
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w2
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: add w18, w18, w18
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w0
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w17
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w15
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w14
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w13
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w12
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #208]
|
|
; NONEON-NOSVE-NEXT: sxtb x9, w11
|
|
; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w10
|
|
; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: sxtb x11, w11
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #192]
|
|
; NONEON-NOSVE-NEXT: sxtb x8, w10
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #176]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272]
|
|
; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176]
|
|
; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240]
|
|
; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128]
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160]
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192]
|
|
; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #400
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = sext <32 x i8> %b to <32 x i64>
|
|
store <32 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i16 -> i32
|
|
;
|
|
|
|
define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i16_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <8 x i16> %a to <8 x i32>
|
|
store <8 x i32>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v16i16_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: add z1.h, z1.h, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: sunpklo z3.s, z3.h
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrh w3, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: sxth w16, w16
|
|
; NONEON-NOSVE-NEXT: sxth w13, w13
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: sxth w12, w12
|
|
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w13, w16, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: sxth w13, w3
|
|
; NONEON-NOSVE-NEXT: add w16, w5, w5
|
|
; NONEON-NOSVE-NEXT: add w3, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sxth w12, w16
|
|
; NONEON-NOSVE-NEXT: sxth w13, w3
|
|
; NONEON-NOSVE-NEXT: add w16, w2, w2
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: sxth w12, w16
|
|
; NONEON-NOSVE-NEXT: sxth w13, w0
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: add w16, w18, w18
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: sxth w12, w16
|
|
; NONEON-NOSVE-NEXT: sxth w13, w17
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: sxth w12, w15
|
|
; NONEON-NOSVE-NEXT: sxth w13, w14
|
|
; NONEON-NOSVE-NEXT: sxth w11, w11
|
|
; NONEON-NOSVE-NEXT: sxth w10, w10
|
|
; NONEON-NOSVE-NEXT: sxth w9, w9
|
|
; NONEON-NOSVE-NEXT: sxth w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <16 x i16>, ptr %in
|
|
%b = add <16 x i16> %a, %a
|
|
%c = sext <16 x i16> %b to <16 x i32>
|
|
store <16 x i32> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i16 -> i64
|
|
;
|
|
|
|
define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v4i16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <4 x i16> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i16_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <8 x i16> %a to <8 x i64>
|
|
store <8 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v16i16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: add z1.h, z1.h, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z3.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.s, z2.h
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: sunpklo z4.d, z4.s
|
|
; CHECK-NEXT: sunpklo z5.d, z5.s
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z7, z1
|
|
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q5, [x1]
|
|
; CHECK-NEXT: sunpklo z3.d, z7.s
|
|
; CHECK-NEXT: stp q0, q4, [x1, #64]
|
|
; CHECK-NEXT: sunpklo z0.d, z6.s
|
|
; CHECK-NEXT: stp q1, q3, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w13, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrh w3, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: sxth x16, w16
|
|
; NONEON-NOSVE-NEXT: sxth x13, w13
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: sxth x12, w12
|
|
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp x13, x16, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: sxth x13, w3
|
|
; NONEON-NOSVE-NEXT: add w16, w5, w5
|
|
; NONEON-NOSVE-NEXT: add w3, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: sxth x12, w16
|
|
; NONEON-NOSVE-NEXT: sxth x13, w3
|
|
; NONEON-NOSVE-NEXT: add w16, w2, w2
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: sxth x12, w16
|
|
; NONEON-NOSVE-NEXT: sxth x13, w0
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: add w16, w18, w18
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: sxth x12, w16
|
|
; NONEON-NOSVE-NEXT: sxth x13, w17
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: sxth x12, w15
|
|
; NONEON-NOSVE-NEXT: sxth x13, w14
|
|
; NONEON-NOSVE-NEXT: sxth x11, w11
|
|
; NONEON-NOSVE-NEXT: sxth x10, w10
|
|
; NONEON-NOSVE-NEXT: sxth x9, w9
|
|
; NONEON-NOSVE-NEXT: sxth x8, w8
|
|
; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp x10, x11, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <16 x i16>, ptr %in
|
|
%b = add <16 x i16> %a, %a
|
|
%c = sext <16 x i16> %b to <16 x i64>
|
|
store <16 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; sext i32 -> i64
|
|
;
|
|
|
|
define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
|
|
; CHECK-LABEL: sext_v4i32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v4i32_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = sext <4 x i32> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: sext_v8i32_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: add z1.s, z1.s, z1.s
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: sunpklo z1.d, z1.s
|
|
; CHECK-NEXT: sunpklo z2.d, z2.s
|
|
; CHECK-NEXT: sunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp]
|
|
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: sxtw x8, w8
|
|
; NONEON-NOSVE-NEXT: sxtw x9, w9
|
|
; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: add w8, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w9, w14, w14
|
|
; NONEON-NOSVE-NEXT: sxtw x8, w8
|
|
; NONEON-NOSVE-NEXT: sxtw x9, w9
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: sxtw x13, w13
|
|
; NONEON-NOSVE-NEXT: sxtw x12, w12
|
|
; NONEON-NOSVE-NEXT: sxtw x11, w11
|
|
; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: sxtw x8, w10
|
|
; NONEON-NOSVE-NEXT: stp x12, x13, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <8 x i32>, ptr %in
|
|
%b = add <8 x i32> %a, %a
|
|
%c = sext <8 x i32> %b to <8 x i64>
|
|
store <8 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i8 -> i16
|
|
;
|
|
|
|
define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v16i8_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v16i8_v16i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <16 x i8> %a to <16 x i16>
|
|
store <16 x i16>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
|
|
define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v32i8_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: uunpklo z2.h, z2.b
|
|
; CHECK-NEXT: uunpklo z3.h, z3.b
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #208
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: add w5, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: and w5, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w5, [sp, #74]
|
|
; NONEON-NOSVE-NEXT: add w5, w29, w29
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
|
|
; NONEON-NOSVE-NEXT: add w8, w28, w28
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: and w5, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #68]
|
|
; NONEON-NOSVE-NEXT: add w9, w27, w27
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w23, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: strh w5, [sp, #66]
|
|
; NONEON-NOSVE-NEXT: add w5, w26, w26
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: add w8, w25, w25
|
|
; NONEON-NOSVE-NEXT: ldrb w22, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: and w9, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: add w5, w24, w24
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w21, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: strh w9, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: add w9, w23, w23
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: and w8, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w20, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: add w5, w22, w22
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w19, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: add w9, w21, w21
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: and w8, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: add w5, w20, w20
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: add w9, w19, w19
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: and w8, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: add w0, w16, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: add w4, w4, w4
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: add w18, w15, w15
|
|
; NONEON-NOSVE-NEXT: and w6, w0, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: add w9, w3, w3
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #110]
|
|
; NONEON-NOSVE-NEXT: and w8, w4, #0xff
|
|
; NONEON-NOSVE-NEXT: and w7, w18, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: add w2, w2, w2
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #108]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: add w9, w0, w0
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #106]
|
|
; NONEON-NOSVE-NEXT: and w8, w2, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: add w18, w18, w18
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: add w9, w17, w17
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #102]
|
|
; NONEON-NOSVE-NEXT: and w8, w18, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #100]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: add w9, w15, w15
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #98]
|
|
; NONEON-NOSVE-NEXT: and w8, w16, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: add w9, w13, w13
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
|
|
; NONEON-NOSVE-NEXT: and w8, w14, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: add w9, w11, w11
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
|
|
; NONEON-NOSVE-NEXT: and w8, w12, #0xff
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
|
|
; NONEON-NOSVE-NEXT: and w8, w10, #0xff
|
|
; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: strh w6, [sp, #78]
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w6, w30, w30
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: and w6, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
|
|
; NONEON-NOSVE-NEXT: and w8, w10, #0xff
|
|
; NONEON-NOSVE-NEXT: strh w7, [sp, #76]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: strh w6, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #208
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = zext <32 x i8> %b to <32 x i16>
|
|
store <32 x i16> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i8 -> i32
|
|
;
|
|
|
|
define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v8i8_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <8 x i8> %a to <8 x i32>
|
|
store <8 x i32>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v16i8_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <16 x i8> %a to <16 x i32>
|
|
store <16 x i32> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v32i8_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: uunpklo z3.h, z1.b
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.h, z2.b
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: uunpklo z4.s, z4.h
|
|
; CHECK-NEXT: uunpklo z5.s, z5.h
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z7, z1
|
|
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q3, q5, [x1]
|
|
; CHECK-NEXT: uunpklo z3.s, z7.h
|
|
; CHECK-NEXT: stp q0, q4, [x1, #64]
|
|
; CHECK-NEXT: uunpklo z0.s, z6.h
|
|
; CHECK-NEXT: stp q1, q3, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #272
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: add w18, w15, w15
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: add w0, w16, w16
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: and w19, w18, #0xff
|
|
; NONEON-NOSVE-NEXT: and w7, w0, #0xff
|
|
; NONEON-NOSVE-NEXT: add w6, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: stp w19, w7, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: add w7, w30, w30
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: and w6, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: and w7, w7, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: add w6, w29, w29
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: add w8, w28, w28
|
|
; NONEON-NOSVE-NEXT: and w6, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: add w9, w27, w27
|
|
; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: add w6, w26, w26
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w22, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: and w6, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w23, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: add w8, w25, w25
|
|
; NONEON-NOSVE-NEXT: stp w6, w9, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: add w9, w24, w24
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrb w21, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: add w6, w23, w23
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: add w8, w22, w22
|
|
; NONEON-NOSVE-NEXT: and w9, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldrb w5, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: add w6, w21, w21
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: add w9, w20, w20
|
|
; NONEON-NOSVE-NEXT: and w8, w6, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: add w5, w5, w5
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: add w8, w4, w4
|
|
; NONEON-NOSVE-NEXT: and w9, w5, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: add w3, w3, w3
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168]
|
|
; NONEON-NOSVE-NEXT: add w9, w2, w2
|
|
; NONEON-NOSVE-NEXT: and w8, w3, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: add w0, w0, w0
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #160]
|
|
; NONEON-NOSVE-NEXT: add w8, w18, w18
|
|
; NONEON-NOSVE-NEXT: and w9, w0, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
|
|
; NONEON-NOSVE-NEXT: add w9, w16, w16
|
|
; NONEON-NOSVE-NEXT: and w8, w17, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: add w8, w14, w14
|
|
; NONEON-NOSVE-NEXT: and w9, w15, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
|
|
; NONEON-NOSVE-NEXT: add w9, w12, w12
|
|
; NONEON-NOSVE-NEXT: and w8, w13, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: add w8, w10, w10
|
|
; NONEON-NOSVE-NEXT: and w9, w11, #0xff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldr w10, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: and w8, w10, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #272
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = zext <32 x i8> %b to <32 x i32>
|
|
store <32 x i32> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i8 -> i64
|
|
;
|
|
|
|
; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The zero
|
|
; extend is a two step process where the container is zero_extend_inreg'd with
|
|
; the result feeding a normal zero extend from halfs to doublewords.
|
|
define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v4i8_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: and z0.h, z0.h, #0xff
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <4 x i8> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v8i8_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #80
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #80
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <8 x i8> %a to <8 x i64>
|
|
store <8 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v16i8_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z3.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: uunpklo z4.d, z4.s
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: uunpklo z5.d, z5.s
|
|
; CHECK-NEXT: uunpklo z6.d, z6.s
|
|
; CHECK-NEXT: stp q0, q4, [x0]
|
|
; CHECK-NEXT: movprfx z0, z1
|
|
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q5, [x0, #64]
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: stp q2, q6, [x0, #32]
|
|
; CHECK-NEXT: stp q1, q0, [x0, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-160]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #152]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #136]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #120]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64]
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <16 x i8> %a to <16 x i64>
|
|
store <16 x i64> %b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v32i8_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
|
; CHECK-NEXT: add z1.b, z1.b, z1.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.h, z0.b
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: uunpklo z3.h, z3.b
|
|
; CHECK-NEXT: uunpklo z2.h, z2.b
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z5.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: movprfx z7, z3
|
|
; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: uunpklo z4.s, z4.h
|
|
; CHECK-NEXT: movprfx z16, z0
|
|
; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: uunpklo z6.s, z2.h
|
|
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z17, z5
|
|
; CHECK-NEXT: ext z17.b, z17.b, z5.b, #8
|
|
; CHECK-NEXT: uunpklo z7.s, z7.h
|
|
; CHECK-NEXT: uunpklo z5.d, z5.s
|
|
; CHECK-NEXT: uunpklo z16.d, z16.s
|
|
; CHECK-NEXT: movprfx z20, z3
|
|
; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8
|
|
; CHECK-NEXT: uunpklo z19.d, z4.s
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: uunpklo z17.d, z17.s
|
|
; CHECK-NEXT: uunpklo z18.d, z6.s
|
|
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
|
|
; CHECK-NEXT: str q5, [x1]
|
|
; CHECK-NEXT: stp q0, q16, [x1, #128]
|
|
; CHECK-NEXT: uunpklo z0.d, z1.s
|
|
; CHECK-NEXT: uunpklo z16.d, z20.s
|
|
; CHECK-NEXT: uunpklo z4.d, z4.s
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z6.d, z6.s
|
|
; CHECK-NEXT: stp q17, q0, [x1, #16]
|
|
; CHECK-NEXT: movprfx z0, z2
|
|
; CHECK-NEXT: ext z0.b, z0.b, z2.b, #8
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q16, [x1, #64]
|
|
; CHECK-NEXT: movprfx z3, z7
|
|
; CHECK-NEXT: ext z3.b, z3.b, z7.b, #8
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: stp q19, q4, [x1, #160]
|
|
; CHECK-NEXT: uunpklo z4.d, z7.s
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: stp q18, q6, [x1, #192]
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: str q1, [x1, #48]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #224]
|
|
; CHECK-NEXT: stp q4, q3, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #400
|
|
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
|
|
; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #172]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #292]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #300]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47]
|
|
; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21]
|
|
; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrb w27, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrb w28, [sp, #23]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
|
|
; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19]
|
|
; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25]
|
|
; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrb w24, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: ldrb w21, [sp, #27]
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w19, [sp, #29]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #140]
|
|
; NONEON-NOSVE-NEXT: add w8, w30, w30
|
|
; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #148]
|
|
; NONEON-NOSVE-NEXT: add w9, w29, w29
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #132]
|
|
; NONEON-NOSVE-NEXT: add w8, w27, w27
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #124]
|
|
; NONEON-NOSVE-NEXT: add w9, w28, w28
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #108]
|
|
; NONEON-NOSVE-NEXT: add w8, w26, w26
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #116]
|
|
; NONEON-NOSVE-NEXT: add w9, w25, w25
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #100]
|
|
; NONEON-NOSVE-NEXT: add w8, w24, w24
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #92]
|
|
; NONEON-NOSVE-NEXT: add w9, w21, w21
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w7, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #76]
|
|
; NONEON-NOSVE-NEXT: add w8, w19, w19
|
|
; NONEON-NOSVE-NEXT: ldrb w6, [sp, #31]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #84]
|
|
; NONEON-NOSVE-NEXT: add w9, w20, w20
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w4, [sp, #33]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #68]
|
|
; NONEON-NOSVE-NEXT: add w8, w7, w7
|
|
; NONEON-NOSVE-NEXT: ldrb w5, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: add w9, w6, w6
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #17]
|
|
; NONEON-NOSVE-NEXT: ldrb w3, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: add w8, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrb w2, [sp, #35]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: add w9, w5, w5
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: add w0, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w22, w18, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w18, [sp, #37]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #296]
|
|
; NONEON-NOSVE-NEXT: add w8, w3, w3
|
|
; NONEON-NOSVE-NEXT: and w23, w0, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: str w9, [sp, #288]
|
|
; NONEON-NOSVE-NEXT: add w9, w2, w2
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w17, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #272]
|
|
; NONEON-NOSVE-NEXT: add w8, w18, w18
|
|
; NONEON-NOSVE-NEXT: ldrb w16, [sp, #39]
|
|
; NONEON-NOSVE-NEXT: str w9, [sp, #280]
|
|
; NONEON-NOSVE-NEXT: add w9, w0, w0
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #264]
|
|
; NONEON-NOSVE-NEXT: add w8, w17, w17
|
|
; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #252]
|
|
; NONEON-NOSVE-NEXT: add w9, w16, w16
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #236]
|
|
; NONEON-NOSVE-NEXT: add w8, w15, w15
|
|
; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244]
|
|
; NONEON-NOSVE-NEXT: add w9, w14, w14
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45]
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #228]
|
|
; NONEON-NOSVE-NEXT: add w8, w12, w12
|
|
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #220]
|
|
; NONEON-NOSVE-NEXT: add w9, w13, w13
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #204]
|
|
; NONEON-NOSVE-NEXT: add w8, w11, w11
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #212]
|
|
; NONEON-NOSVE-NEXT: add w9, w10, w10
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #196]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #188]
|
|
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: and w22, w22, #0xff
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: stp wzr, w22, [sp, #164]
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: stp wzr, w23, [sp, #156]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #180]
|
|
; NONEON-NOSVE-NEXT: and w8, w9, #0xff
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #276]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #284]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #260]
|
|
; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #268]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #176]
|
|
; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272]
|
|
; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176]
|
|
; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240]
|
|
; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128]
|
|
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160]
|
|
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload
|
|
; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192]
|
|
; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #400
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <32 x i8>, ptr %in
|
|
%b = add <32 x i8> %a, %a
|
|
%c = zext <32 x i8> %b to <32 x i64>
|
|
store <32 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i16 -> i32
|
|
;
|
|
|
|
define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v8i16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v8i16_v8i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <8 x i16> %a to <8 x i32>
|
|
store <8 x i32>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v16i16_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: add z1.h, z1.h, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrh w3, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w17, w17, w17
|
|
; NONEON-NOSVE-NEXT: add w16, w16, w16
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w17, w17, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w16, w16, #0xffff
|
|
; NONEON-NOSVE-NEXT: stp w17, w14, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: add w14, w3, w3
|
|
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: add w17, w5, w5
|
|
; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: add w16, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: and w14, w17, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w16, w16, #0xffff
|
|
; NONEON-NOSVE-NEXT: add w17, w2, w2
|
|
; NONEON-NOSVE-NEXT: stp w16, w14, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: add w14, w0, w0
|
|
; NONEON-NOSVE-NEXT: and w16, w17, #0xffff
|
|
; NONEON-NOSVE-NEXT: add w17, w18, w18
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: and w14, w17, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w13, w13, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w12, w12, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xffff
|
|
; NONEON-NOSVE-NEXT: stp w15, w14, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <16 x i16>, ptr %in
|
|
%b = add <16 x i16> %a, %a
|
|
%c = zext <16 x i16> %b to <16 x i32>
|
|
store <16 x i32> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i16 -> i64
|
|
;
|
|
|
|
define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v4i16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <4 x i16> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v8i16_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q0, q2, [x0]
|
|
; CHECK-NEXT: stp q1, q3, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <8 x i16> %a to <8 x i64>
|
|
store <8 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v16i16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: add z1.h, z1.h, z1.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z3.s, z1.h
|
|
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: movprfx z4, z0
|
|
; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: movprfx z5, z3
|
|
; CHECK-NEXT: ext z5.b, z5.b, z3.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: uunpklo z4.d, z4.s
|
|
; CHECK-NEXT: uunpklo z5.d, z5.s
|
|
; CHECK-NEXT: movprfx z6, z2
|
|
; CHECK-NEXT: ext z6.b, z6.b, z2.b, #8
|
|
; CHECK-NEXT: movprfx z7, z1
|
|
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q3, q5, [x1]
|
|
; CHECK-NEXT: uunpklo z3.d, z7.s
|
|
; CHECK-NEXT: stp q0, q4, [x1, #64]
|
|
; CHECK-NEXT: uunpklo z0.d, z6.s
|
|
; CHECK-NEXT: stp q1, q3, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q0, [x1, #96]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #92]
|
|
; NONEON-NOSVE-NEXT: str wzr, [sp, #156]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w14, [sp]
|
|
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #6]
|
|
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldrh w3, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: add w14, w14, w14
|
|
; NONEON-NOSVE-NEXT: add w15, w15, w15
|
|
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #84]
|
|
; NONEON-NOSVE-NEXT: add w15, w4, w4
|
|
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #76]
|
|
; NONEON-NOSVE-NEXT: add w14, w5, w5
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: add w15, w3, w3
|
|
; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #68]
|
|
; NONEON-NOSVE-NEXT: add w14, w2, w2
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: add w15, w18, w18
|
|
; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: add w14, w0, w0
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: add w13, w13, w13
|
|
; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: add w14, w16, w16
|
|
; NONEON-NOSVE-NEXT: add w12, w12, w12
|
|
; NONEON-NOSVE-NEXT: str w15, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: add w15, w17, w17
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: and w14, w14, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w15, w15, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w13, w13, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w12, w12, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w10, w10, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w11, w11, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w9, w9, #0xffff
|
|
; NONEON-NOSVE-NEXT: and w8, w8, #0xffff
|
|
; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #148]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #140]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w12, [sp, #132]
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w13, [sp, #124]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w11, [sp, #116]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w10, [sp, #108]
|
|
; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #100]
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #160
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <16 x i16>, ptr %in
|
|
%b = add <16 x i16> %a, %a
|
|
%c = zext <16 x i16> %b to <16 x i64>
|
|
store <16 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; zext i32 -> i64
|
|
;
|
|
|
|
define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
|
|
; CHECK-LABEL: zext_v4i32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v4i32_v4i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%b = zext <4 x i32> %a to <4 x i64>
|
|
store <4 x i64>%b, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
|
|
; CHECK-LABEL: zext_v8i32_v8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: add z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: add z1.s, z1.s, z1.s
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: uunpklo z1.d, z1.s
|
|
; CHECK-NEXT: uunpklo z2.d, z2.s
|
|
; CHECK-NEXT: uunpklo z3.d, z3.s
|
|
; CHECK-NEXT: stp q1, q3, [x1]
|
|
; CHECK-NEXT: stp q0, q2, [x1, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp]
|
|
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: add w10, w10, w10
|
|
; NONEON-NOSVE-NEXT: add w11, w11, w11
|
|
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: add w10, w15, w15
|
|
; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: add w10, w14, w14
|
|
; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: add w10, w13, w13
|
|
; NONEON-NOSVE-NEXT: add w9, w9, w9
|
|
; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: add w10, w12, w12
|
|
; NONEON-NOSVE-NEXT: add w8, w8, w8
|
|
; NONEON-NOSVE-NEXT: stp w11, wzr, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%a = load <8 x i32>, ptr %in
|
|
%b = add <8 x i32> %a, %a
|
|
%c = zext <8 x i32> %b to <8 x i64>
|
|
store <8 x i64> %c, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
|
|
; SVE-LABEL: extend_and_mul:
|
|
; SVE: // %bb.0:
|
|
; SVE-NEXT: mov z1.s, w0
|
|
; SVE-NEXT: ptrue p0.d, vl2
|
|
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; SVE-NEXT: uunpklo z1.d, z1.s
|
|
; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
|
; SVE-NEXT: str q0, [x1]
|
|
; SVE-NEXT: ret
|
|
;
|
|
; SVE2-LABEL: extend_and_mul:
|
|
; SVE2: // %bb.0:
|
|
; SVE2-NEXT: mov z1.s, w0
|
|
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; SVE2-NEXT: uunpklo z1.d, z1.s
|
|
; SVE2-NEXT: mul z0.d, z1.d, z0.d
|
|
; SVE2-NEXT: str q0, [x1]
|
|
; SVE2-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: extend_and_mul:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: mov w9, w0
|
|
; NONEON-NOSVE-NEXT: mul x10, x9, x8
|
|
; NONEON-NOSVE-NEXT: ldr x8, [sp]
|
|
; NONEON-NOSVE-NEXT: mul x8, x9, x8
|
|
; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
|
|
%broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
|
|
%4 = zext <2 x i32> %broadcast.splat3 to <2 x i64>
|
|
%5 = mul <2 x i64> %4, %1
|
|
store <2 x i64> %5, ptr %2, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
|
|
; CHECK-LABEL: extend_no_mul:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: mov w8, w0
|
|
; CHECK-NEXT: mov z0.d, x8
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: extend_no_mul:
|
|
; NONEON-NOSVE: // %bb.0: // %entry
|
|
; NONEON-NOSVE-NEXT: mov w8, w0
|
|
; NONEON-NOSVE-NEXT: stp x8, x8, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
entry:
|
|
%broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
|
|
%broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
|
|
%3 = zext <2 x i32> %broadcast.splat3 to <2 x i64>
|
|
store <2 x i64> %3, ptr %2, align 2
|
|
ret void
|
|
}
|