llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

3493 lines
151 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
;
; truncate i16 -> i8
;
define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v16i16_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z2.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14]
; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12]
; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10]
; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6]
; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4]
; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2]
; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
; NONEON-NOSVE-NEXT: ldrh w8, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = trunc <16 x i16> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i16_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b
; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
; CHECK-NEXT: add z1.b, z2.b, z2.b
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #208
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #16]
; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #48]
; NONEON-NOSVE-NEXT: ldrh w25, [sp, #28]
; NONEON-NOSVE-NEXT: ldrh w26, [sp, #30]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #64]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldrh w29, [sp, #52]
; NONEON-NOSVE-NEXT: ldrh w27, [sp, #48]
; NONEON-NOSVE-NEXT: ldrh w28, [sp, #50]
; NONEON-NOSVE-NEXT: ldrh w23, [sp, #24]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54]
; NONEON-NOSVE-NEXT: ldrh w24, [sp, #26]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w21, [sp, #20]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: ldrh w22, [sp, #22]
; NONEON-NOSVE-NEXT: strb w8, [sp, #92]
; NONEON-NOSVE-NEXT: add w8, w29, w29
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #44]
; NONEON-NOSVE-NEXT: strb w9, [sp, #91]
; NONEON-NOSVE-NEXT: add w9, w28, w28
; NONEON-NOSVE-NEXT: ldrh w7, [sp, #46]
; NONEON-NOSVE-NEXT: strb w8, [sp, #90]
; NONEON-NOSVE-NEXT: add w8, w27, w27
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #40]
; NONEON-NOSVE-NEXT: strb w9, [sp, #89]
; NONEON-NOSVE-NEXT: add w9, w26, w26
; NONEON-NOSVE-NEXT: ldrh w3, [sp, #42]
; NONEON-NOSVE-NEXT: strb w8, [sp, #88]
; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #36]
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w19, [sp, #16]
; NONEON-NOSVE-NEXT: ldrh w20, [sp, #18]
; NONEON-NOSVE-NEXT: strb w9, [sp, #87]
; NONEON-NOSVE-NEXT: add w9, w24, w24
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #38]
; NONEON-NOSVE-NEXT: strb w8, [sp, #86]
; NONEON-NOSVE-NEXT: add w8, w23, w23
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #60]
; NONEON-NOSVE-NEXT: strb w9, [sp, #85]
; NONEON-NOSVE-NEXT: add w9, w22, w22
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #62]
; NONEON-NOSVE-NEXT: add w6, w12, w12
; NONEON-NOSVE-NEXT: strb w8, [sp, #84]
; NONEON-NOSVE-NEXT: add w8, w21, w21
; NONEON-NOSVE-NEXT: add w5, w13, w13
; NONEON-NOSVE-NEXT: strb w9, [sp, #83]
; NONEON-NOSVE-NEXT: add w9, w20, w20
; NONEON-NOSVE-NEXT: strb w8, [sp, #82]
; NONEON-NOSVE-NEXT: add w8, w19, w19
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #32]
; NONEON-NOSVE-NEXT: strb w9, [sp, #81]
; NONEON-NOSVE-NEXT: add w9, w7, w7
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #34]
; NONEON-NOSVE-NEXT: strb w8, [sp, #80]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #76]
; NONEON-NOSVE-NEXT: strb w9, [sp, #111]
; NONEON-NOSVE-NEXT: add w9, w3, w3
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #78]
; NONEON-NOSVE-NEXT: strb w8, [sp, #110]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #72]
; NONEON-NOSVE-NEXT: strb w9, [sp, #109]
; NONEON-NOSVE-NEXT: add w9, w0, w0
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #74]
; NONEON-NOSVE-NEXT: strb w8, [sp, #108]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #68]
; NONEON-NOSVE-NEXT: strb w9, [sp, #107]
; NONEON-NOSVE-NEXT: add w9, w17, w17
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #70]
; NONEON-NOSVE-NEXT: strb w8, [sp, #106]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: ldrh w30, [sp, #58]
; NONEON-NOSVE-NEXT: strb w9, [sp, #105]
; NONEON-NOSVE-NEXT: add w9, w15, w15
; NONEON-NOSVE-NEXT: strb w8, [sp, #104]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strb w9, [sp, #103]
; NONEON-NOSVE-NEXT: add w9, w13, w13
; NONEON-NOSVE-NEXT: strb w8, [sp, #102]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strb w9, [sp, #101]
; NONEON-NOSVE-NEXT: add w9, w11, w11
; NONEON-NOSVE-NEXT: strb w8, [sp, #100]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strb w9, [sp, #99]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #98]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w5, [sp, #95]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w6, [sp, #94]
; NONEON-NOSVE-NEXT: strb w5, [sp, #93]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w9, [sp, #97]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #96]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #208
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i16>, ptr %in
%b = trunc <32 x i16> %a to <32 x i8>
%c = add <32 x i8> %b, %b
store <32 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v64i16_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: ldp q2, q3, [x0, #96]
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b
; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b }
; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b }
; CHECK-NEXT: splice z2.b, p0, { z2.b, z3.b }
; CHECK-NEXT: splice z3.b, p0, { z4.b, z5.b }
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: add z2.b, z2.b, z2.b
; CHECK-NEXT: add z3.b, z3.b, z3.b
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #448
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #416] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #432] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #400] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #224]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #238]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #256]
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #232]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #272]
; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #160]
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #230]
; NONEON-NOSVE-NEXT: add w21, w8, w8
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #274]
; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #192]
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #228]
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #226]
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #224]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #276]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #278]
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #270]
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #268]
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #266]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #280]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #282]
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #264]
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #262]
; NONEON-NOSVE-NEXT: ldrh w1, [sp, #260]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #284]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #286]
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #258]
; NONEON-NOSVE-NEXT: ldrh w3, [sp, #256]
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #254]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #208]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #210]
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #252]
; NONEON-NOSVE-NEXT: ldrh w6, [sp, #250]
; NONEON-NOSVE-NEXT: ldrh w7, [sp, #248]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #212]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #214]
; NONEON-NOSVE-NEXT: ldrh w19, [sp, #246]
; NONEON-NOSVE-NEXT: ldrh w20, [sp, #244]
; NONEON-NOSVE-NEXT: ldrh w22, [sp, #242]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #216]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #218]
; NONEON-NOSVE-NEXT: ldrh w23, [sp, #240]
; NONEON-NOSVE-NEXT: ldrh w24, [sp, #174]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #384] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #220]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #222]
; NONEON-NOSVE-NEXT: ldrh w25, [sp, #172]
; NONEON-NOSVE-NEXT: ldrh w26, [sp, #170]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #368] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #176]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #178]
; NONEON-NOSVE-NEXT: ldrh w27, [sp, #168]
; NONEON-NOSVE-NEXT: ldrh w28, [sp, #166]
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #352] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #180]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #182]
; NONEON-NOSVE-NEXT: ldrh w29, [sp, #164]
; NONEON-NOSVE-NEXT: ldrh w30, [sp, #162]
; NONEON-NOSVE-NEXT: strb w21, [sp, #335]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #184]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #186]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #188]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #190]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #192]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #194]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #196]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #198]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #200]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #202]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #204]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #206]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #160]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #236]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #234]
; NONEON-NOSVE-NEXT: strb w9, [sp, #334]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #333]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strb w8, [sp, #332]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strb w8, [sp, #331]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strb w8, [sp, #330]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strb w8, [sp, #329]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strb w8, [sp, #328]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strb w8, [sp, #327]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: strb w8, [sp, #326]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strb w8, [sp, #325]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: strb w8, [sp, #324]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strb w8, [sp, #323]
; NONEON-NOSVE-NEXT: add w8, w1, w1
; NONEON-NOSVE-NEXT: strb w8, [sp, #322]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: strb w8, [sp, #321]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strb w8, [sp, #320]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: strb w8, [sp, #319]
; NONEON-NOSVE-NEXT: add w8, w5, w5
; NONEON-NOSVE-NEXT: strb w8, [sp, #318]
; NONEON-NOSVE-NEXT: add w8, w6, w6
; NONEON-NOSVE-NEXT: strb w8, [sp, #317]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strb w8, [sp, #316]
; NONEON-NOSVE-NEXT: add w8, w19, w19
; NONEON-NOSVE-NEXT: strb w8, [sp, #315]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: strb w8, [sp, #314]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: strb w8, [sp, #313]
; NONEON-NOSVE-NEXT: add w8, w23, w23
; NONEON-NOSVE-NEXT: strb w8, [sp, #312]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strb w8, [sp, #311]
; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: strb w8, [sp, #310]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strb w8, [sp, #309]
; NONEON-NOSVE-NEXT: add w8, w27, w27
; NONEON-NOSVE-NEXT: strb w8, [sp, #308]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strb w8, [sp, #307]
; NONEON-NOSVE-NEXT: add w8, w29, w29
; NONEON-NOSVE-NEXT: strb w8, [sp, #306]
; NONEON-NOSVE-NEXT: add w8, w30, w30
; NONEON-NOSVE-NEXT: strb w8, [sp, #305]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #432] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #416] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #304]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #400] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #384] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #303]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #368] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #352] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #302]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #301]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #300]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #299]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #298]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #297]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #296]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #295]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #294]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #293]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #292]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #291]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #290]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #289]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #288]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #288]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #351]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #350]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #349]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #348]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #347]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #346]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #345]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #344]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #343]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #342]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #341]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #340]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #339]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #338]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #337]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #336]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #320]
; NONEON-NOSVE-NEXT: stp q3, q2, [x8]
; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #448
; NONEON-NOSVE-NEXT: ret
%a = load <64 x i16>, ptr %in
%b = trunc <64 x i16> %a to <64 x i8>
%c = add <64 x i8> %b, %b
store <64 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v128i16_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b
; CHECK-NEXT: ldp q3, q18, [x0, #224]
; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b
; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b
; CHECK-NEXT: ldp q18, q22, [x0, #160]
; CHECK-NEXT: uzp1 z20.b, z3.b, z3.b
; CHECK-NEXT: uzp1 z24.b, z19.b, z19.b
; CHECK-NEXT: ldp q3, q19, [x0, #96]
; CHECK-NEXT: uzp1 z23.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z26.b, z22.b, z22.b
; CHECK-NEXT: splice z2.b, p0, { z16.b, z17.b }
; CHECK-NEXT: uzp1 z17.b, z7.b, z7.b
; CHECK-NEXT: uzp1 z25.b, z18.b, z18.b
; CHECK-NEXT: splice z7.b, p0, { z20.b, z21.b }
; CHECK-NEXT: uzp1 z21.b, z5.b, z5.b
; CHECK-NEXT: uzp1 z19.b, z19.b, z19.b
; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b
; CHECK-NEXT: uzp1 z5.b, z1.b, z1.b
; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b
; CHECK-NEXT: splice z6.b, p0, { z23.b, z24.b }
; CHECK-NEXT: uzp1 z18.b, z3.b, z3.b
; CHECK-NEXT: splice z3.b, p0, { z25.b, z26.b }
; CHECK-NEXT: uzp1 z4.b, z0.b, z0.b
; CHECK-NEXT: add z0.b, z2.b, z2.b
; CHECK-NEXT: add z7.b, z7.b, z7.b
; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b }
; CHECK-NEXT: splice z2.b, p0, { z18.b, z19.b }
; CHECK-NEXT: splice z16.b, p0, { z20.b, z21.b }
; CHECK-NEXT: splice z4.b, p0, { z4.b, z5.b }
; CHECK-NEXT: add z6.b, z6.b, z6.b
; CHECK-NEXT: add z3.b, z3.b, z3.b
; CHECK-NEXT: stp q0, q7, [x1, #96]
; CHECK-NEXT: add z0.b, z1.b, z1.b
; CHECK-NEXT: add z1.b, z2.b, z2.b
; CHECK-NEXT: add z2.b, z16.b, z16.b
; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.b, z4.b, z4.b
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: sub sp, sp, #800
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: str x1, [sp, #408] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96]
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192]
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160]
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128]
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224]
; NONEON-NOSVE-NEXT: str q0, [sp, #592]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #606]
; NONEON-NOSVE-NEXT: str q19, [sp, #496]
; NONEON-NOSVE-NEXT: ldrh w10, [sp, #600]
; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #512]
; NONEON-NOSVE-NEXT: ldrh w11, [sp, #598]
; NONEON-NOSVE-NEXT: ldrh w12, [sp, #596]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #432]
; NONEON-NOSVE-NEXT: ldrh w13, [sp, #594]
; NONEON-NOSVE-NEXT: str w8, [sp, #64] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #432]
; NONEON-NOSVE-NEXT: ldrh w14, [sp, #592]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #464]
; NONEON-NOSVE-NEXT: ldr w30, [sp, #64] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: str w8, [sp, #404] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #434]
; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #560]
; NONEON-NOSVE-NEXT: str w8, [sp, #400] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #436]
; NONEON-NOSVE-NEXT: str q5, [sp, #544]
; NONEON-NOSVE-NEXT: str w8, [sp, #396] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #438]
; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #608]
; NONEON-NOSVE-NEXT: str w8, [sp, #392] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #440]
; NONEON-NOSVE-NEXT: ldrh w15, [sp, #638]
; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #640]
; NONEON-NOSVE-NEXT: ldrh w16, [sp, #636]
; NONEON-NOSVE-NEXT: ldrh w17, [sp, #634]
; NONEON-NOSVE-NEXT: str w8, [sp, #388] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #442]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #666]
; NONEON-NOSVE-NEXT: str q3, [sp, #416]
; NONEON-NOSVE-NEXT: ldrh w18, [sp, #632]
; NONEON-NOSVE-NEXT: ldrh w0, [sp, #630]
; NONEON-NOSVE-NEXT: str w8, [sp, #384] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #444]
; NONEON-NOSVE-NEXT: ldrh w1, [sp, #628]
; NONEON-NOSVE-NEXT: ldrh w2, [sp, #626]
; NONEON-NOSVE-NEXT: ldrh w3, [sp, #624]
; NONEON-NOSVE-NEXT: ldrh w4, [sp, #622]
; NONEON-NOSVE-NEXT: str w8, [sp, #380] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #446]
; NONEON-NOSVE-NEXT: ldrh w5, [sp, #620]
; NONEON-NOSVE-NEXT: ldrh w6, [sp, #618]
; NONEON-NOSVE-NEXT: ldrh w7, [sp, #616]
; NONEON-NOSVE-NEXT: ldrh w19, [sp, #614]
; NONEON-NOSVE-NEXT: str w8, [sp, #376] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #480]
; NONEON-NOSVE-NEXT: ldrh w20, [sp, #612]
; NONEON-NOSVE-NEXT: ldrh w21, [sp, #610]
; NONEON-NOSVE-NEXT: ldrh w22, [sp, #608]
; NONEON-NOSVE-NEXT: ldrh w23, [sp, #430]
; NONEON-NOSVE-NEXT: str w8, [sp, #372] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #482]
; NONEON-NOSVE-NEXT: ldrh w24, [sp, #428]
; NONEON-NOSVE-NEXT: ldrh w25, [sp, #426]
; NONEON-NOSVE-NEXT: ldrh w26, [sp, #424]
; NONEON-NOSVE-NEXT: ldrh w27, [sp, #422]
; NONEON-NOSVE-NEXT: str w8, [sp, #368] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #484]
; NONEON-NOSVE-NEXT: ldrh w28, [sp, #420]
; NONEON-NOSVE-NEXT: ldrh w29, [sp, #418]
; NONEON-NOSVE-NEXT: strb w30, [sp, #767]
; NONEON-NOSVE-NEXT: str w8, [sp, #364] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #486]
; NONEON-NOSVE-NEXT: str w8, [sp, #360] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #488]
; NONEON-NOSVE-NEXT: str w8, [sp, #356] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #490]
; NONEON-NOSVE-NEXT: str w8, [sp, #352] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #492]
; NONEON-NOSVE-NEXT: str w8, [sp, #348] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #494]
; NONEON-NOSVE-NEXT: str w8, [sp, #344] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #448]
; NONEON-NOSVE-NEXT: str w8, [sp, #340] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #450]
; NONEON-NOSVE-NEXT: str w8, [sp, #336] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #452]
; NONEON-NOSVE-NEXT: str w8, [sp, #332] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #454]
; NONEON-NOSVE-NEXT: str w8, [sp, #328] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #456]
; NONEON-NOSVE-NEXT: str w8, [sp, #324] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #458]
; NONEON-NOSVE-NEXT: str w8, [sp, #320] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #460]
; NONEON-NOSVE-NEXT: str w8, [sp, #316] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #462]
; NONEON-NOSVE-NEXT: str w8, [sp, #312] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #464]
; NONEON-NOSVE-NEXT: str w8, [sp, #308] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #466]
; NONEON-NOSVE-NEXT: str w8, [sp, #304] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #468]
; NONEON-NOSVE-NEXT: str w8, [sp, #300] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #470]
; NONEON-NOSVE-NEXT: str w8, [sp, #296] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #472]
; NONEON-NOSVE-NEXT: str w8, [sp, #292] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #474]
; NONEON-NOSVE-NEXT: str w8, [sp, #288] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #476]
; NONEON-NOSVE-NEXT: str w8, [sp, #284] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #478]
; NONEON-NOSVE-NEXT: str w8, [sp, #280] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #656]
; NONEON-NOSVE-NEXT: str w8, [sp, #276] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #658]
; NONEON-NOSVE-NEXT: str w8, [sp, #272] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #660]
; NONEON-NOSVE-NEXT: str w8, [sp, #268] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #662]
; NONEON-NOSVE-NEXT: str w8, [sp, #264] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #664]
; NONEON-NOSVE-NEXT: str w8, [sp, #260] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #668]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #252] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #670]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #528]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #244] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #530]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #532]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #236] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #534]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #536]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #228] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #538]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #540]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #220] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #542]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #496]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #212] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #498]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #500]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #502]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #504]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #506]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #508]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #188] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #510]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #512]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #180] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #514]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #516]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #518]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #520]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #164] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #522]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #524]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #156] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #526]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #640]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #148] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #642]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #644]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #140] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #646]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #648]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #650]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #652]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #124] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #654]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #576]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #116] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #578]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #580]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #108] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #582]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #584]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #586]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #588]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #92] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #590]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #544]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #84] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #546]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #548]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #76] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #550]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #552]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #68] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #554]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #556]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #558]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #560]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #562]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #564]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #566]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #568]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #570]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #572]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #574]
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #416]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldrh w8, [sp, #602]
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #604]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #765]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strb w8, [sp, #764]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strb w8, [sp, #763]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strb w8, [sp, #762]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strb w8, [sp, #761]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strb w8, [sp, #760]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strb w8, [sp, #759]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: strb w8, [sp, #758]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strb w8, [sp, #757]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: strb w8, [sp, #756]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strb w8, [sp, #755]
; NONEON-NOSVE-NEXT: add w8, w1, w1
; NONEON-NOSVE-NEXT: strb w8, [sp, #754]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: strb w8, [sp, #753]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strb w8, [sp, #752]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: strb w8, [sp, #751]
; NONEON-NOSVE-NEXT: add w8, w5, w5
; NONEON-NOSVE-NEXT: strb w8, [sp, #750]
; NONEON-NOSVE-NEXT: add w8, w6, w6
; NONEON-NOSVE-NEXT: strb w8, [sp, #749]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strb w8, [sp, #748]
; NONEON-NOSVE-NEXT: add w8, w19, w19
; NONEON-NOSVE-NEXT: strb w8, [sp, #747]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: strb w8, [sp, #746]
; NONEON-NOSVE-NEXT: add w8, w21, w21
; NONEON-NOSVE-NEXT: strb w8, [sp, #745]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: strb w8, [sp, #744]
; NONEON-NOSVE-NEXT: add w8, w23, w23
; NONEON-NOSVE-NEXT: strb w8, [sp, #743]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strb w8, [sp, #742]
; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: strb w8, [sp, #741]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strb w8, [sp, #740]
; NONEON-NOSVE-NEXT: add w8, w27, w27
; NONEON-NOSVE-NEXT: strb w8, [sp, #739]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strb w8, [sp, #738]
; NONEON-NOSVE-NEXT: add w8, w29, w29
; NONEON-NOSVE-NEXT: strb w8, [sp, #737]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w9, [sp, #766]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #736]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #736]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #735]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #734]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #733]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #732]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #731]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #730]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #729]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #728]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #727]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #726]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #725]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #724]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #723]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #722]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #721]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #720]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #783]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #782]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #781]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #780]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #779]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #778]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #777]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #776]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #775]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #774]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #773]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #772]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #771]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #770]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #769]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #768]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #719]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #718]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #717]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #164] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #716]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #715]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #172] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #714]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #713]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #180] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #712]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #711]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #188] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #710]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #709]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #196] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #708]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #707]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #204] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #706]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #705]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #212] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #704]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #704]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #799]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #220] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #798]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #797]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #228] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #796]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #232] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #795]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #236] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #794]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #793]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #244] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #792]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #248] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #791]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #252] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #790]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #789]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #788]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #787]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #786]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #785]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #784]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #280] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #768]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #687]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #686]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #685]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #292] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #684]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #683]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #682]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #681]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #680]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #312] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #679]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #678]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #677]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #676]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #675]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #674]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #673]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #340] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #672]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #344] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #703]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #348] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #702]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #701]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #356] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #700]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #699]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #698]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #697]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #696]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #695]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #694]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #693]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #692]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #691]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #690]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #689]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #688]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #408] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #672]
; NONEON-NOSVE-NEXT: stp q1, q0, [x8]
; NONEON-NOSVE-NEXT: stp q4, q3, [x8, #32]
; NONEON-NOSVE-NEXT: stp q7, q6, [x8, #64]
; NONEON-NOSVE-NEXT: stp q2, q5, [x8, #96]
; NONEON-NOSVE-NEXT: add sp, sp, #800
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ret
%a = load <128 x i16>, ptr %in
%b = trunc <128 x i16> %a to <128 x i8>
%c = add <128 x i8> %b, %b
store <128 x i8> %c, ptr %out
ret void
}
;
; truncate i32 -> i8
;
define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i32_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h
; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
; NONEON-NOSVE-NEXT: strb w9, [sp, #47]
; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
; NONEON-NOSVE-NEXT: strb w9, [sp, #45]
; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w9, [sp, #43]
; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp]
; NONEON-NOSVE-NEXT: strb w9, [sp, #41]
; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i8>
ret <8 x i8> %b
}
define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v16i32_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #80
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
; NONEON-NOSVE-NEXT: str q1, [sp, #48]
; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
; NONEON-NOSVE-NEXT: str q2, [sp]
; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
; NONEON-NOSVE-NEXT: strb w9, [sp, #77]
; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56]
; NONEON-NOSVE-NEXT: strb w9, [sp, #75]
; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48]
; NONEON-NOSVE-NEXT: strb w9, [sp, #73]
; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
; NONEON-NOSVE-NEXT: strb w9, [sp, #71]
; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
; NONEON-NOSVE-NEXT: strb w9, [sp, #69]
; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8]
; NONEON-NOSVE-NEXT: strb w9, [sp, #67]
; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp]
; NONEON-NOSVE-NEXT: strb w9, [sp, #65]
; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #64]
; NONEON-NOSVE-NEXT: add sp, sp, #80
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i32_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
; CHECK-NEXT: uzp1 z17.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
; CHECK-NEXT: uzp1 z19.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h
; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h
; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h }
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b
; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b }
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #272
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112]
; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112]
; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104]
; NONEON-NOSVE-NEXT: add w6, w8, w8
; NONEON-NOSVE-NEXT: add w5, w9, w9
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128]
; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96]
; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24]
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120]
; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48]
; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: strb w8, [sp, #155]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strb w9, [sp, #154]
; NONEON-NOSVE-NEXT: add w9, w27, w27
; NONEON-NOSVE-NEXT: strb w8, [sp, #153]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strb w9, [sp, #152]
; NONEON-NOSVE-NEXT: add w9, w25, w25
; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56]
; NONEON-NOSVE-NEXT: strb w8, [sp, #151]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strb w9, [sp, #150]
; NONEON-NOSVE-NEXT: add w9, w23, w23
; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48]
; NONEON-NOSVE-NEXT: strb w8, [sp, #149]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: strb w9, [sp, #148]
; NONEON-NOSVE-NEXT: add w9, w21, w21
; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40]
; NONEON-NOSVE-NEXT: strb w8, [sp, #147]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: strb w9, [sp, #146]
; NONEON-NOSVE-NEXT: add w9, w19, w19
; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32]
; NONEON-NOSVE-NEXT: strb w8, [sp, #145]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strb w9, [sp, #144]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72]
; NONEON-NOSVE-NEXT: strb w8, [sp, #175]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strb w9, [sp, #174]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64]
; NONEON-NOSVE-NEXT: strb w8, [sp, #173]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strb w9, [sp, #172]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136]
; NONEON-NOSVE-NEXT: strb w8, [sp, #171]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strb w9, [sp, #170]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: strb w8, [sp, #169]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strb w9, [sp, #168]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: strb w8, [sp, #167]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strb w9, [sp, #166]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80]
; NONEON-NOSVE-NEXT: strb w8, [sp, #165]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strb w9, [sp, #164]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: strb w8, [sp, #163]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w9, [sp, #162]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w5, [sp, #159]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w6, [sp, #158]
; NONEON-NOSVE-NEXT: add w6, w29, w29
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: strb w5, [sp, #157]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w6, [sp, #156]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #161]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w9, [sp, #160]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144]
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #272
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i8>
%c = add <32 x i8> %b, %b
store <32 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v64i32_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #160]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q4, q5, [x0, #96]
; CHECK-NEXT: ldp q6, q7, [x0]
; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
; CHECK-NEXT: ldp q3, q18, [x0, #128]
; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
; CHECK-NEXT: ldp q2, q19, [x0, #192]
; CHECK-NEXT: ldp q0, q1, [x0, #64]
; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
; CHECK-NEXT: ldp q18, q22, [x0, #224]
; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h
; CHECK-NEXT: ldp q3, q23, [x0, #32]
; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z27.h, z19.h, z19.h
; CHECK-NEXT: uzp1 z25.h, z22.h, z22.h
; CHECK-NEXT: uzp1 z26.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h
; CHECK-NEXT: uzp1 z18.h, z23.h, z23.h
; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h
; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
; CHECK-NEXT: splice z1.h, p0, { z20.h, z21.h }
; CHECK-NEXT: splice z6.h, p0, { z24.h, z25.h }
; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
; CHECK-NEXT: splice z0.h, p0, { z26.h, z27.h }
; CHECK-NEXT: splice z7.h, p0, { z17.h, z18.h }
; CHECK-NEXT: uzp1 z17.b, z16.b, z16.b
; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h }
; CHECK-NEXT: splice z3.h, p0, { z22.h, z23.h }
; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z16.b, z1.b, z1.b
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z1.b, z7.b, z7.b
; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
; CHECK-NEXT: splice z7.b, p0, { z16.b, z17.b }
; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
; CHECK-NEXT: splice z4.b, p0, { z5.b, z6.b }
; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
; CHECK-NEXT: add z2.b, z7.b, z7.b
; CHECK-NEXT: add z3.b, z4.b, z4.b
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: stp q2, q3, [x1, #32]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: sub sp, sp, #480
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96]
; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0]
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224]
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192]
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #288]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #316]
; NONEON-NOSVE-NEXT: str q18, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w10, [sp, #304]
; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #176]
; NONEON-NOSVE-NEXT: ldr w11, [sp, #296]
; NONEON-NOSVE-NEXT: ldr w12, [sp, #292]
; NONEON-NOSVE-NEXT: add w20, w8, w8
; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #224]
; NONEON-NOSVE-NEXT: ldr w13, [sp, #288]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w22, [sp, #312]
; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #384]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #400]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #404]
; NONEON-NOSVE-NEXT: str q7, [sp, #160]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #320]
; NONEON-NOSVE-NEXT: ldr w18, [sp, #396]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #392]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #408]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #412]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #332]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #328]
; NONEON-NOSVE-NEXT: ldr w16, [sp, #324]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #272]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #276]
; NONEON-NOSVE-NEXT: ldr w17, [sp, #320]
; NONEON-NOSVE-NEXT: ldr w1, [sp, #388]
; NONEON-NOSVE-NEXT: ldr w2, [sp, #384]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #280]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #284]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #348]
; NONEON-NOSVE-NEXT: ldr w4, [sp, #344]
; NONEON-NOSVE-NEXT: ldr w5, [sp, #340]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w6, [sp, #336]
; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #352]
; NONEON-NOSVE-NEXT: ldr w7, [sp, #380]
; NONEON-NOSVE-NEXT: ldr w19, [sp, #376]
; NONEON-NOSVE-NEXT: ldr w21, [sp, #372]
; NONEON-NOSVE-NEXT: ldr w23, [sp, #368]
; NONEON-NOSVE-NEXT: ldr w24, [sp, #364]
; NONEON-NOSVE-NEXT: ldr w25, [sp, #360]
; NONEON-NOSVE-NEXT: ldr w26, [sp, #356]
; NONEON-NOSVE-NEXT: ldr w27, [sp, #352]
; NONEON-NOSVE-NEXT: strb w20, [sp, #463]
; NONEON-NOSVE-NEXT: add w20, w22, w22
; NONEON-NOSVE-NEXT: strb w20, [sp, #462]
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240]
; NONEON-NOSVE-NEXT: ldp w29, w28, [sp, #168]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #260]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #264]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #268]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #300]
; NONEON-NOSVE-NEXT: ldp w8, w30, [sp, #160]
; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w8, [sp, #308]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #461]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strb w8, [sp, #460]
; NONEON-NOSVE-NEXT: add w8, w9, w9
; NONEON-NOSVE-NEXT: strb w8, [sp, #459]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strb w8, [sp, #458]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strb w8, [sp, #457]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strb w8, [sp, #456]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strb w8, [sp, #455]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strb w8, [sp, #454]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: strb w8, [sp, #453]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strb w8, [sp, #452]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: strb w8, [sp, #451]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strb w8, [sp, #450]
; NONEON-NOSVE-NEXT: add w8, w1, w1
; NONEON-NOSVE-NEXT: strb w8, [sp, #449]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: strb w8, [sp, #448]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strb w8, [sp, #447]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: strb w8, [sp, #446]
; NONEON-NOSVE-NEXT: add w8, w5, w5
; NONEON-NOSVE-NEXT: strb w8, [sp, #445]
; NONEON-NOSVE-NEXT: add w8, w6, w6
; NONEON-NOSVE-NEXT: strb w8, [sp, #444]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strb w8, [sp, #443]
; NONEON-NOSVE-NEXT: add w8, w19, w19
; NONEON-NOSVE-NEXT: strb w8, [sp, #442]
; NONEON-NOSVE-NEXT: add w8, w21, w21
; NONEON-NOSVE-NEXT: strb w8, [sp, #441]
; NONEON-NOSVE-NEXT: add w8, w23, w23
; NONEON-NOSVE-NEXT: strb w8, [sp, #440]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strb w8, [sp, #439]
; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: strb w8, [sp, #438]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strb w8, [sp, #437]
; NONEON-NOSVE-NEXT: add w8, w27, w27
; NONEON-NOSVE-NEXT: strb w8, [sp, #436]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strb w8, [sp, #435]
; NONEON-NOSVE-NEXT: add w8, w29, w29
; NONEON-NOSVE-NEXT: strb w8, [sp, #434]
; NONEON-NOSVE-NEXT: add w8, w30, w30
; NONEON-NOSVE-NEXT: strb w8, [sp, #433]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #432]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #431]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #430]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #429]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #428]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #427]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #426]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #425]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #424]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #423]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #422]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #421]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #420]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #419]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #418]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #417]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #416]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #416]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #479]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #478]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #477]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #476]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #475]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #474]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #473]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #472]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #471]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #470]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #469]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #468]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #467]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #466]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #465]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w8, [sp, #464]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #448]
; NONEON-NOSVE-NEXT: stp q3, q2, [x8]
; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #480
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ret
%a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i8>
%c = add <64 x i8> %b, %b
store <64 x i8> %c, ptr %out
ret void
}
;
; truncate i32 -> i16
;
define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i32_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h
; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
; NONEON-NOSVE-NEXT: strh w9, [sp, #46]
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
; NONEON-NOSVE-NEXT: strh w9, [sp, #42]
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8]
; NONEON-NOSVE-NEXT: strh w9, [sp, #38]
; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp]
; NONEON-NOSVE-NEXT: strh w9, [sp, #34]
; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i16>
ret <8 x i16> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i32_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h
; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: add z1.h, z2.h, z2.h
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp]
; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #32]
; NONEON-NOSVE-NEXT: ldp w4, w5, [sp, #8]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: ldp w18, w0, [sp]
; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #24]
; NONEON-NOSVE-NEXT: strh w8, [sp, #78]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strh w9, [sp, #76]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16]
; NONEON-NOSVE-NEXT: strh w8, [sp, #74]
; NONEON-NOSVE-NEXT: add w8, w5, w5
; NONEON-NOSVE-NEXT: strh w9, [sp, #72]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #56]
; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strh w9, [sp, #68]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #48]
; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strh w9, [sp, #64]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strh w9, [sp, #92]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strh w9, [sp, #88]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strh w9, [sp, #84]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
; NONEON-NOSVE-NEXT: strh w9, [sp, #80]
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i16>
%c = add <16 x i16> %b, %b
store <16 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q2, q3, [x0, #96]
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h }
; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h }
; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h }
; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: add z1.h, z1.h, z1.h
; CHECK-NEXT: add z2.h, z2.h, z2.h
; CHECK-NEXT: add z3.h, z3.h, z3.h
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #304
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112]
; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16]
; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88]
; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112]
; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104]
; NONEON-NOSVE-NEXT: add w6, w8, w8
; NONEON-NOSVE-NEXT: add w5, w9, w9
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128]
; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96]
; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24]
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120]
; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48]
; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: strh w8, [sp, #182]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strh w9, [sp, #180]
; NONEON-NOSVE-NEXT: add w9, w27, w27
; NONEON-NOSVE-NEXT: strh w8, [sp, #178]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strh w9, [sp, #176]
; NONEON-NOSVE-NEXT: add w9, w25, w25
; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56]
; NONEON-NOSVE-NEXT: strh w8, [sp, #174]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strh w9, [sp, #172]
; NONEON-NOSVE-NEXT: add w9, w23, w23
; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48]
; NONEON-NOSVE-NEXT: strh w8, [sp, #170]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: strh w9, [sp, #168]
; NONEON-NOSVE-NEXT: add w9, w21, w21
; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40]
; NONEON-NOSVE-NEXT: strh w8, [sp, #166]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: strh w9, [sp, #164]
; NONEON-NOSVE-NEXT: add w9, w19, w19
; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32]
; NONEON-NOSVE-NEXT: strh w8, [sp, #162]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strh w9, [sp, #160]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72]
; NONEON-NOSVE-NEXT: strh w8, [sp, #158]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strh w9, [sp, #156]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64]
; NONEON-NOSVE-NEXT: strh w8, [sp, #154]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strh w9, [sp, #152]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136]
; NONEON-NOSVE-NEXT: strh w8, [sp, #150]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strh w9, [sp, #148]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: strh w8, [sp, #146]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strh w9, [sp, #144]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: strh w8, [sp, #206]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strh w9, [sp, #204]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80]
; NONEON-NOSVE-NEXT: strh w8, [sp, #202]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strh w9, [sp, #200]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: strh w8, [sp, #198]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w9, [sp, #196]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w5, [sp, #190]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w6, [sp, #188]
; NONEON-NOSVE-NEXT: add w6, w29, w29
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: strh w5, [sp, #186]
; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #144]
; NONEON-NOSVE-NEXT: strh w6, [sp, #184]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w8, [sp, #194]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w9, [sp, #192]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #176]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q3, q2, [x1]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #304
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i16>
%c = add <32 x i16> %b, %b
store <32 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v64i32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
; CHECK-NEXT: ldp q3, q18, [x0, #224]
; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
; CHECK-NEXT: ldp q18, q22, [x0, #160]
; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h
; CHECK-NEXT: uzp1 z24.h, z19.h, z19.h
; CHECK-NEXT: ldp q3, q19, [x0, #96]
; CHECK-NEXT: uzp1 z23.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z26.h, z22.h, z22.h
; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h
; CHECK-NEXT: uzp1 z25.h, z18.h, z18.h
; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h }
; CHECK-NEXT: uzp1 z21.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h
; CHECK-NEXT: splice z6.h, p0, { z23.h, z24.h }
; CHECK-NEXT: uzp1 z18.h, z3.h, z3.h
; CHECK-NEXT: splice z3.h, p0, { z25.h, z26.h }
; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
; CHECK-NEXT: add z0.h, z2.h, z2.h
; CHECK-NEXT: add z7.h, z7.h, z7.h
; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h }
; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h }
; CHECK-NEXT: splice z16.h, p0, { z20.h, z21.h }
; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
; CHECK-NEXT: add z6.h, z6.h, z6.h
; CHECK-NEXT: add z3.h, z3.h, z3.h
; CHECK-NEXT: stp q0, q7, [x1, #96]
; CHECK-NEXT: add z0.h, z1.h, z1.h
; CHECK-NEXT: add z1.h, z2.h, z2.h
; CHECK-NEXT: add z2.h, z16.h, z16.h
; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.h, z4.h, z4.h
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: sub sp, sp, #528
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: mov x5, x1
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192]
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224]
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96]
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160]
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128]
; NONEON-NOSVE-NEXT: str q0, [sp, #320]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #332]
; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #160]
; NONEON-NOSVE-NEXT: ldr w10, [sp, #320]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #192]
; NONEON-NOSVE-NEXT: ldr w23, [sp, #328]
; NONEON-NOSVE-NEXT: add w21, w8, w8
; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #240]
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #160]
; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #368]
; NONEON-NOSVE-NEXT: str q19, [sp, #224]
; NONEON-NOSVE-NEXT: ldr w29, [sp, #380]
; NONEON-NOSVE-NEXT: ldr w30, [sp, #376]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #168]
; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #288]
; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #336]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #300]
; NONEON-NOSVE-NEXT: ldr w4, [sp, #296]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w11, [sp, #360]
; NONEON-NOSVE-NEXT: ldr w12, [sp, #356]
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w13, [sp, #352]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #348]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #344]
; NONEON-NOSVE-NEXT: str q3, [sp, #144]
; NONEON-NOSVE-NEXT: ldr w16, [sp, #340]
; NONEON-NOSVE-NEXT: ldr w17, [sp, #336]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w6, [sp, #292]
; NONEON-NOSVE-NEXT: ldr w7, [sp, #288]
; NONEON-NOSVE-NEXT: str q5, [sp, #272]
; NONEON-NOSVE-NEXT: ldr w25, [sp, #316]
; NONEON-NOSVE-NEXT: ldr w26, [sp, #312]
; NONEON-NOSVE-NEXT: ldr w19, [sp, #284]
; NONEON-NOSVE-NEXT: ldr w20, [sp, #280]
; NONEON-NOSVE-NEXT: ldr w22, [sp, #276]
; NONEON-NOSVE-NEXT: ldr w24, [sp, #272]
; NONEON-NOSVE-NEXT: ldr w27, [sp, #308]
; NONEON-NOSVE-NEXT: ldr w28, [sp, #304]
; NONEON-NOSVE-NEXT: strh w21, [sp, #494]
; NONEON-NOSVE-NEXT: add w21, w23, w23
; NONEON-NOSVE-NEXT: strh w21, [sp, #492]
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216]
; NONEON-NOSVE-NEXT: ldp w0, w18, [sp, #152]
; NONEON-NOSVE-NEXT: ldp w2, w1, [sp, #144]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #384]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #388]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #392]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #396]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #260]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #264]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #268]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w9, [sp, #368]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #372]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w8, [sp, #324]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #364]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #490]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strh w8, [sp, #488]
; NONEON-NOSVE-NEXT: add w8, w9, w9
; NONEON-NOSVE-NEXT: strh w8, [sp, #486]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strh w8, [sp, #484]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strh w8, [sp, #482]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strh w8, [sp, #480]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strh w8, [sp, #478]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strh w8, [sp, #476]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: strh w8, [sp, #474]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strh w8, [sp, #472]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: strh w8, [sp, #470]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strh w8, [sp, #468]
; NONEON-NOSVE-NEXT: add w8, w1, w1
; NONEON-NOSVE-NEXT: strh w8, [sp, #466]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: strh w8, [sp, #464]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strh w8, [sp, #462]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: strh w8, [sp, #460]
; NONEON-NOSVE-NEXT: add w8, w6, w6
; NONEON-NOSVE-NEXT: strh w8, [sp, #458]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strh w8, [sp, #456]
; NONEON-NOSVE-NEXT: add w8, w19, w19
; NONEON-NOSVE-NEXT: strh w8, [sp, #454]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: strh w8, [sp, #452]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: strh w8, [sp, #450]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: strh w8, [sp, #448]
; NONEON-NOSVE-NEXT: add w8, w25, w25
; NONEON-NOSVE-NEXT: strh w8, [sp, #510]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: strh w8, [sp, #508]
; NONEON-NOSVE-NEXT: add w8, w27, w27
; NONEON-NOSVE-NEXT: strh w8, [sp, #506]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: strh w8, [sp, #504]
; NONEON-NOSVE-NEXT: add w8, w29, w29
; NONEON-NOSVE-NEXT: strh w8, [sp, #502]
; NONEON-NOSVE-NEXT: add w8, w30, w30
; NONEON-NOSVE-NEXT: strh w8, [sp, #500]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #464]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #498]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #496]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #446]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #444]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #442]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #440]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #438]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #436]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #434]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #432]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #432]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #526]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #524]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #522]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #520]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #518]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #516]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #514]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #512]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #496]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #414]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #412]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #410]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #408]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #406]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #404]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #402]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #400]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #430]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #428]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #426]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #424]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #422]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #420]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #418]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w8, [sp, #416]
; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #400]
; NONEON-NOSVE-NEXT: stp q1, q0, [x5]
; NONEON-NOSVE-NEXT: stp q4, q3, [x5, #32]
; NONEON-NOSVE-NEXT: stp q7, q6, [x5, #64]
; NONEON-NOSVE-NEXT: stp q2, q5, [x5, #96]
; NONEON-NOSVE-NEXT: add sp, sp, #528
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ret
%a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i16>
%c = add <64 x i16> %b, %b
store <64 x i16> %c, ptr %out
ret void
}
;
; truncate i64 -> i8
;
; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp]
; NONEON-NOSVE-NEXT: strh w9, [sp, #46]
; NONEON-NOSVE-NEXT: strh w10, [sp, #42]
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i8>
ret <4 x i8> %b
}
define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i64_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #80
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
; NONEON-NOSVE-NEXT: str q1, [sp, #48]
; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
; NONEON-NOSVE-NEXT: str q2, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48]
; NONEON-NOSVE-NEXT: strb w9, [sp, #79]
; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32]
; NONEON-NOSVE-NEXT: strb w10, [sp, #77]
; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
; NONEON-NOSVE-NEXT: strb w11, [sp, #75]
; NONEON-NOSVE-NEXT: strb w8, [sp, #73]
; NONEON-NOSVE-NEXT: ldr x8, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #72]
; NONEON-NOSVE-NEXT: add sp, sp, #80
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i8>
ret <8 x i8> %b
}
define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z19.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z1.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z18.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z0.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s }
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: splice z1.s, p0, { z18.s, z19.s }
; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #144
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #96]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q6, q7, [x0, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32]
; NONEON-NOSVE-NEXT: str q3, [sp, #80]
; NONEON-NOSVE-NEXT: str q2, [sp]
; NONEON-NOSVE-NEXT: stp q7, q5, [sp, #48]
; NONEON-NOSVE-NEXT: strb w8, [sp, #142]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #16]
; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #96]
; NONEON-NOSVE-NEXT: strb w9, [sp, #143]
; NONEON-NOSVE-NEXT: strb w8, [sp, #140]
; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #48]
; NONEON-NOSVE-NEXT: strb w10, [sp, #141]
; NONEON-NOSVE-NEXT: strb w8, [sp, #138]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #120]
; NONEON-NOSVE-NEXT: strb w11, [sp, #139]
; NONEON-NOSVE-NEXT: strb w8, [sp, #137]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #112]
; NONEON-NOSVE-NEXT: strb w8, [sp, #136]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #72]
; NONEON-NOSVE-NEXT: strb w8, [sp, #135]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #64]
; NONEON-NOSVE-NEXT: strb w8, [sp, #134]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #104]
; NONEON-NOSVE-NEXT: strb w8, [sp, #133]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #96]
; NONEON-NOSVE-NEXT: strb w8, [sp, #132]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #88]
; NONEON-NOSVE-NEXT: strb w8, [sp, #131]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #80]
; NONEON-NOSVE-NEXT: strb w8, [sp, #130]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
; NONEON-NOSVE-NEXT: strb w8, [sp, #129]
; NONEON-NOSVE-NEXT: ldr x8, [sp]
; NONEON-NOSVE-NEXT: strb w8, [sp, #128]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #128]
; NONEON-NOSVE-NEXT: add sp, sp, #144
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i64_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q5, q6, [x0, #224]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q7, [x0, #64]
; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s
; CHECK-NEXT: ldp q6, q18, [x0, #192]
; CHECK-NEXT: uzp1 z16.s, z5.s, z5.s
; CHECK-NEXT: ldp q5, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
; CHECK-NEXT: ldp q18, q22, [x0, #160]
; CHECK-NEXT: uzp1 z20.s, z6.s, z6.s
; CHECK-NEXT: ldp q6, q23, [x0, #96]
; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s }
; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s
; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s
; CHECK-NEXT: uzp1 z26.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s
; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s
; CHECK-NEXT: uzp1 z23.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s
; CHECK-NEXT: uzp1 z6.s, z7.s, z7.s
; CHECK-NEXT: uzp1 z22.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z5.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-NEXT: splice z3.s, p0, { z20.s, z21.s }
; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
; CHECK-NEXT: splice z0.s, p0, { z24.s, z25.s }
; CHECK-NEXT: splice z7.s, p0, { z26.s, z27.s }
; CHECK-NEXT: splice z4.s, p0, { z17.s, z18.s }
; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h
; CHECK-NEXT: splice z5.s, p0, { z5.s, z6.s }
; CHECK-NEXT: splice z6.s, p0, { z22.s, z23.s }
; CHECK-NEXT: splice z1.s, p0, { z1.s, z2.s }
; CHECK-NEXT: uzp1 z16.h, z3.h, z3.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z19.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h
; CHECK-NEXT: uzp1 z18.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h }
; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
; CHECK-NEXT: uzp1 z7.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z6.b, z3.b, z3.b
; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b }
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #416
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #336] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #352] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #368] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #384] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128]
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #400] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0]
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #320] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224]
; NONEON-NOSVE-NEXT: str x1, [sp, #24] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192]
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #160]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #176]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #184]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #192]
; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w25, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w26, [sp, #216]
; NONEON-NOSVE-NEXT: add w5, w9, w9
; NONEON-NOSVE-NEXT: add w6, w8, w8
; NONEON-NOSVE-NEXT: ldr w9, [sp, #192]
; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w2, [sp, #64]
; NONEON-NOSVE-NEXT: ldr w16, [sp, #48]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: ldr w18, [sp, #96]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #128]
; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #224]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #72]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #128]
; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w23, [sp, #240]
; NONEON-NOSVE-NEXT: ldr w21, [sp, #224]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #272]
; NONEON-NOSVE-NEXT: ldr w27, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w28, [sp, #264]
; NONEON-NOSVE-NEXT: strb w9, [sp, #298]
; NONEON-NOSVE-NEXT: ldr w24, [sp, #248]
; NONEON-NOSVE-NEXT: ldr w22, [sp, #232]
; NONEON-NOSVE-NEXT: add w9, w27, w27
; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w8, [sp, #200]
; NONEON-NOSVE-NEXT: str q7, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #104]
; NONEON-NOSVE-NEXT: ldr w12, [sp, #112]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strb w9, [sp, #296]
; NONEON-NOSVE-NEXT: add w9, w25, w25
; NONEON-NOSVE-NEXT: str q18, [sp, #80]
; NONEON-NOSVE-NEXT: ldr w19, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w20, [sp, #40]
; NONEON-NOSVE-NEXT: strb w8, [sp, #299]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: ldr w4, [sp, #80]
; NONEON-NOSVE-NEXT: strb w9, [sp, #294]
; NONEON-NOSVE-NEXT: add w9, w23, w23
; NONEON-NOSVE-NEXT: ldr w7, [sp, #88]
; NONEON-NOSVE-NEXT: strb w8, [sp, #297]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: ldr w17, [sp, #56]
; NONEON-NOSVE-NEXT: strb w9, [sp, #292]
; NONEON-NOSVE-NEXT: add w9, w21, w21
; NONEON-NOSVE-NEXT: ldr w10, [sp, #144]
; NONEON-NOSVE-NEXT: strb w8, [sp, #295]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: ldr w15, [sp, #136]
; NONEON-NOSVE-NEXT: strb w9, [sp, #290]
; NONEON-NOSVE-NEXT: add w9, w19, w19
; NONEON-NOSVE-NEXT: ldr w13, [sp, #120]
; NONEON-NOSVE-NEXT: strb w8, [sp, #293]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: ldr w11, [sp, #152]
; NONEON-NOSVE-NEXT: strb w9, [sp, #288]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldr w1, [sp, #280]
; NONEON-NOSVE-NEXT: strb w8, [sp, #291]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: ldr w29, [sp, #160]
; NONEON-NOSVE-NEXT: strb w9, [sp, #318]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: ldr w30, [sp, #168]
; NONEON-NOSVE-NEXT: strb w8, [sp, #289]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: strb w9, [sp, #316]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: strb w8, [sp, #319]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strb w9, [sp, #314]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: strb w8, [sp, #317]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strb w9, [sp, #312]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: strb w8, [sp, #315]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strb w9, [sp, #310]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: strb w8, [sp, #313]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strb w9, [sp, #308]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: strb w8, [sp, #311]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strb w9, [sp, #306]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #309]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: strb w5, [sp, #303]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: strb w6, [sp, #302]
; NONEON-NOSVE-NEXT: add w6, w29, w29
; NONEON-NOSVE-NEXT: strb w8, [sp, #307]
; NONEON-NOSVE-NEXT: add w8, w1, w1
; NONEON-NOSVE-NEXT: strb w5, [sp, #301]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #400] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w6, [sp, #300]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #384] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w8, [sp, #305]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
; NONEON-NOSVE-NEXT: strb w9, [sp, #304]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #368] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #288]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #352] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #336] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q1, q0, [x8]
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #320] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: add sp, sp, #416
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i8>
%c = add <32 x i8> %b, %b
store <32 x i8> %c, ptr %out
ret void
}
;
; truncate i64 -> i16
;
define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp]
; NONEON-NOSVE-NEXT: strh w9, [sp, #46]
; NONEON-NOSVE-NEXT: strh w10, [sp, #42]
; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i16>
ret <4 x i16> %b
}
define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i64_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #80
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
; NONEON-NOSVE-NEXT: str q1, [sp, #48]
; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
; NONEON-NOSVE-NEXT: str q2, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #76]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48]
; NONEON-NOSVE-NEXT: strh w9, [sp, #78]
; NONEON-NOSVE-NEXT: strh w8, [sp, #72]
; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32]
; NONEON-NOSVE-NEXT: strh w10, [sp, #74]
; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
; NONEON-NOSVE-NEXT: strh w11, [sp, #70]
; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
; NONEON-NOSVE-NEXT: ldr x8, [sp]
; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #64]
; NONEON-NOSVE-NEXT: add sp, sp, #80
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i16>
ret <8 x i16> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z1.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z19.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z0.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
; CHECK-NEXT: uzp1 z18.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s }
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: splice z5.s, p0, { z18.s, z19.s }
; CHECK-NEXT: splice z1.s, p0, { z2.s, z3.s }
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h
; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: splice z1.h, p0, { z6.h, z7.h }
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: add z1.h, z1.h, z1.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #160
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #72]
; NONEON-NOSVE-NEXT: ldr w2, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #104]
; NONEON-NOSVE-NEXT: stp q5, q7, [sp]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldr w4, [sp, #80]
; NONEON-NOSVE-NEXT: ldr w5, [sp, #88]
; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w18, [sp]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #8]
; NONEON-NOSVE-NEXT: strh w9, [sp, #142]
; NONEON-NOSVE-NEXT: add w9, w3, w3
; NONEON-NOSVE-NEXT: strh w8, [sp, #140]
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: ldr w16, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w17, [sp, #40]
; NONEON-NOSVE-NEXT: strh w9, [sp, #138]
; NONEON-NOSVE-NEXT: add w9, w5, w5
; NONEON-NOSVE-NEXT: strh w8, [sp, #136]
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: ldr w14, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #24]
; NONEON-NOSVE-NEXT: strh w9, [sp, #134]
; NONEON-NOSVE-NEXT: add w9, w0, w0
; NONEON-NOSVE-NEXT: strh w8, [sp, #132]
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: ldr w12, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w13, [sp, #56]
; NONEON-NOSVE-NEXT: strh w9, [sp, #130]
; NONEON-NOSVE-NEXT: add w9, w17, w17
; NONEON-NOSVE-NEXT: strh w8, [sp, #128]
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: ldr w10, [sp, #112]
; NONEON-NOSVE-NEXT: ldr w11, [sp, #120]
; NONEON-NOSVE-NEXT: strh w9, [sp, #158]
; NONEON-NOSVE-NEXT: add w9, w15, w15
; NONEON-NOSVE-NEXT: strh w8, [sp, #156]
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: strh w9, [sp, #154]
; NONEON-NOSVE-NEXT: add w9, w13, w13
; NONEON-NOSVE-NEXT: strh w8, [sp, #152]
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: strh w9, [sp, #150]
; NONEON-NOSVE-NEXT: add w9, w11, w11
; NONEON-NOSVE-NEXT: strh w8, [sp, #148]
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: strh w9, [sp, #146]
; NONEON-NOSVE-NEXT: strh w8, [sp, #144]
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #160
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i16>
%c = add <16 x i16> %b, %b
store <16 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #160]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q4, q5, [x0, #96]
; CHECK-NEXT: ldp q6, q7, [x0]
; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
; CHECK-NEXT: ldp q3, q18, [x0, #128]
; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
; CHECK-NEXT: ldp q2, q19, [x0, #192]
; CHECK-NEXT: ldp q0, q1, [x0, #64]
; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
; CHECK-NEXT: ldp q18, q22, [x0, #224]
; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s
; CHECK-NEXT: ldp q3, q23, [x0, #32]
; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s }
; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s
; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s
; CHECK-NEXT: uzp1 z26.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s
; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s
; CHECK-NEXT: uzp1 z23.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
; CHECK-NEXT: uzp1 z22.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s
; CHECK-NEXT: splice z1.s, p0, { z20.s, z21.s }
; CHECK-NEXT: splice z6.s, p0, { z24.s, z25.s }
; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s
; CHECK-NEXT: splice z0.s, p0, { z26.s, z27.s }
; CHECK-NEXT: splice z7.s, p0, { z17.s, z18.s }
; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h
; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
; CHECK-NEXT: splice z3.s, p0, { z22.s, z23.s }
; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s }
; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h
; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
; CHECK-NEXT: splice z7.h, p0, { z16.h, z17.h }
; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
; CHECK-NEXT: splice z4.h, p0, { z5.h, z6.h }
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: add z2.h, z7.h, z7.h
; CHECK-NEXT: add z3.h, z4.h, z4.h
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: add z1.h, z1.h, z1.h
; CHECK-NEXT: stp q2, q3, [x1, #32]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #432
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #352] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #368] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #384] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #400] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32]
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #416] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0]
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #336] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224]
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192]
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #144]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #160]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #168]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #176]
; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w25, [sp, #192]
; NONEON-NOSVE-NEXT: ldr w26, [sp, #200]
; NONEON-NOSVE-NEXT: add w6, w8, w8
; NONEON-NOSVE-NEXT: add w5, w9, w9
; NONEON-NOSVE-NEXT: ldr w9, [sp, #176]
; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #80]
; NONEON-NOSVE-NEXT: ldr w2, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #56]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: ldr w18, [sp, #80]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #112]
; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #88]
; NONEON-NOSVE-NEXT: ldr w16, [sp, #32]
; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #240]
; NONEON-NOSVE-NEXT: ldr w23, [sp, #224]
; NONEON-NOSVE-NEXT: ldr w24, [sp, #232]
; NONEON-NOSVE-NEXT: ldr w10, [sp, #256]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #264]
; NONEON-NOSVE-NEXT: ldr w27, [sp, #240]
; NONEON-NOSVE-NEXT: ldr w28, [sp, #248]
; NONEON-NOSVE-NEXT: strh w9, [sp, #308]
; NONEON-NOSVE-NEXT: ldr w21, [sp, #208]
; NONEON-NOSVE-NEXT: add w9, w27, w27
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w8, [sp, #184]
; NONEON-NOSVE-NEXT: str q7, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w22, [sp, #216]
; NONEON-NOSVE-NEXT: ldr w17, [sp, #40]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w9, [sp, #304]
; NONEON-NOSVE-NEXT: add w9, w25, w25
; NONEON-NOSVE-NEXT: strh w8, [sp, #310]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: ldr w19, [sp, #16]
; NONEON-NOSVE-NEXT: strh w8, [sp, #306]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: ldr w20, [sp, #24]
; NONEON-NOSVE-NEXT: str q18, [sp, #64]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #112]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #120]
; NONEON-NOSVE-NEXT: strh w8, [sp, #302]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: ldr w4, [sp, #64]
; NONEON-NOSVE-NEXT: strh w9, [sp, #300]
; NONEON-NOSVE-NEXT: add w9, w23, w23
; NONEON-NOSVE-NEXT: ldr w7, [sp, #72]
; NONEON-NOSVE-NEXT: strh w8, [sp, #298]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: ldr w12, [sp, #96]
; NONEON-NOSVE-NEXT: strh w9, [sp, #296]
; NONEON-NOSVE-NEXT: add w9, w21, w21
; NONEON-NOSVE-NEXT: ldr w13, [sp, #104]
; NONEON-NOSVE-NEXT: strh w8, [sp, #294]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: ldr w10, [sp, #128]
; NONEON-NOSVE-NEXT: strh w9, [sp, #292]
; NONEON-NOSVE-NEXT: add w9, w19, w19
; NONEON-NOSVE-NEXT: ldr w11, [sp, #136]
; NONEON-NOSVE-NEXT: strh w8, [sp, #290]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: ldr w29, [sp, #144]
; NONEON-NOSVE-NEXT: strh w9, [sp, #288]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldr w30, [sp, #152]
; NONEON-NOSVE-NEXT: strh w8, [sp, #286]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: strh w9, [sp, #284]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: strh w8, [sp, #282]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: strh w9, [sp, #280]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: strh w8, [sp, #278]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: strh w9, [sp, #276]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: strh w8, [sp, #274]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: strh w9, [sp, #272]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: strh w8, [sp, #334]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: strh w9, [sp, #332]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: strh w8, [sp, #330]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: strh w9, [sp, #328]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: strh w8, [sp, #326]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w9, [sp, #324]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: strh w5, [sp, #318]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: strh w6, [sp, #316]
; NONEON-NOSVE-NEXT: add w6, w29, w29
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: strh w5, [sp, #314]
; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #272]
; NONEON-NOSVE-NEXT: strh w6, [sp, #312]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #416] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w8, [sp, #322]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #400] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: strh w9, [sp, #320]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #384] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #304]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #368] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #352] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q3, q2, [x1]
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #336] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #432
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i16>
%c = add <32 x i16> %b, %b
store <32 x i16> %c, ptr %out
ret void
}
;
; truncate i64 -> i32
;
define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
; NONEON-NOSVE-NEXT: ldp x8, x10, [sp]
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i32>
ret <4 x i32> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v8i64_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: add z1.s, z2.s, z2.s
; CHECK-NEXT: add z0.s, z0.s, z0.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp]
; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w12, [sp]
; NONEON-NOSVE-NEXT: ldr w13, [sp, #8]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #40]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #24]
; NONEON-NOSVE-NEXT: ldr w10, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w11, [sp, #56]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72]
; NONEON-NOSVE-NEXT: add w9, w13, w13
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64]
; NONEON-NOSVE-NEXT: add w9, w15, w15
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88]
; NONEON-NOSVE-NEXT: add w9, w11, w11
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80]
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i32>
%c = add <8 x i32> %b, %b
store <8 x i32> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q2, q3, [x0, #96]
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: uzp1 z7.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z6.s, z1.s, z1.s
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z2.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
; CHECK-NEXT: splice z0.s, p0, { z6.s, z7.s }
; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s }
; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
; CHECK-NEXT: splice z3.s, p0, { z4.s, z5.s }
; CHECK-NEXT: add z0.s, z0.s, z0.s
; CHECK-NEXT: add z1.s, z1.s, z1.s
; CHECK-NEXT: add z2.s, z2.s, z2.s
; CHECK-NEXT: add z3.s, z3.s, z3.s
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #192
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64]
; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #64]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #72]
; NONEON-NOSVE-NEXT: ldr w2, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w3, [sp, #104]
; NONEON-NOSVE-NEXT: stp q5, q7, [sp]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: ldr w4, [sp, #80]
; NONEON-NOSVE-NEXT: ldr w5, [sp, #88]
; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w18, [sp]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #8]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168]
; NONEON-NOSVE-NEXT: add w9, w3, w3
; NONEON-NOSVE-NEXT: add w8, w2, w2
; NONEON-NOSVE-NEXT: ldr w16, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w17, [sp, #40]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160]
; NONEON-NOSVE-NEXT: add w9, w5, w5
; NONEON-NOSVE-NEXT: add w8, w4, w4
; NONEON-NOSVE-NEXT: ldr w14, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #24]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152]
; NONEON-NOSVE-NEXT: add w9, w0, w0
; NONEON-NOSVE-NEXT: add w8, w18, w18
; NONEON-NOSVE-NEXT: ldr w12, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w13, [sp, #56]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144]
; NONEON-NOSVE-NEXT: add w9, w17, w17
; NONEON-NOSVE-NEXT: add w8, w16, w16
; NONEON-NOSVE-NEXT: ldr w10, [sp, #112]
; NONEON-NOSVE-NEXT: ldr w11, [sp, #120]
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136]
; NONEON-NOSVE-NEXT: add w9, w15, w15
; NONEON-NOSVE-NEXT: add w8, w14, w14
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128]
; NONEON-NOSVE-NEXT: add w9, w13, w13
; NONEON-NOSVE-NEXT: add w8, w12, w12
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184]
; NONEON-NOSVE-NEXT: add w9, w11, w11
; NONEON-NOSVE-NEXT: add w8, w10, w10
; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176]
; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #128]
; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #160]
; NONEON-NOSVE-NEXT: stp q3, q2, [x1]
; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #192
; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i32>
%c = add <16 x i32> %b, %b
store <16 x i32> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
; CHECK-NEXT: ldp q3, q18, [x0, #224]
; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
; CHECK-NEXT: ldp q18, q22, [x0, #160]
; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z24.s, z19.s, z19.s
; CHECK-NEXT: ldp q3, q19, [x0, #96]
; CHECK-NEXT: uzp1 z23.s, z2.s, z2.s
; CHECK-NEXT: uzp1 z26.s, z22.s, z22.s
; CHECK-NEXT: splice z2.s, p0, { z16.s, z17.s }
; CHECK-NEXT: uzp1 z17.s, z7.s, z7.s
; CHECK-NEXT: uzp1 z25.s, z18.s, z18.s
; CHECK-NEXT: splice z7.s, p0, { z20.s, z21.s }
; CHECK-NEXT: uzp1 z21.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s
; CHECK-NEXT: uzp1 z20.s, z4.s, z4.s
; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s
; CHECK-NEXT: uzp1 z16.s, z6.s, z6.s
; CHECK-NEXT: splice z6.s, p0, { z23.s, z24.s }
; CHECK-NEXT: uzp1 z18.s, z3.s, z3.s
; CHECK-NEXT: splice z3.s, p0, { z25.s, z26.s }
; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s
; CHECK-NEXT: add z0.s, z2.s, z2.s
; CHECK-NEXT: add z7.s, z7.s, z7.s
; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s }
; CHECK-NEXT: splice z2.s, p0, { z18.s, z19.s }
; CHECK-NEXT: splice z16.s, p0, { z20.s, z21.s }
; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s }
; CHECK-NEXT: add z6.s, z6.s, z6.s
; CHECK-NEXT: add z3.s, z3.s, z3.s
; CHECK-NEXT: stp q0, q7, [x1, #96]
; CHECK-NEXT: add z0.s, z1.s, z1.s
; CHECK-NEXT: add z1.s, z2.s, z2.s
; CHECK-NEXT: add z2.s, z16.s, z16.s
; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.s, z4.s, z4.s
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #496
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #416] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192]
; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #432] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #448] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224]
; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #464] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #480] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96]
; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #400] // 16-byte Folded Spill
; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64]
; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160]
; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128]
; NONEON-NOSVE-NEXT: str q0, [sp, #192]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #192]
; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #32]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #200]
; NONEON-NOSVE-NEXT: ldr w10, [sp, #32]
; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #160]
; NONEON-NOSVE-NEXT: ldr w12, [sp, #48]
; NONEON-NOSVE-NEXT: add w6, w8, w8
; NONEON-NOSVE-NEXT: add w5, w9, w9
; NONEON-NOSVE-NEXT: ldr w8, [sp, #40]
; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #112]
; NONEON-NOSVE-NEXT: ldr w25, [sp, #160]
; NONEON-NOSVE-NEXT: ldr w26, [sp, #168]
; NONEON-NOSVE-NEXT: str q5, [sp, #144]
; NONEON-NOSVE-NEXT: ldr w21, [sp, #176]
; NONEON-NOSVE-NEXT: ldr w22, [sp, #184]
; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w23, [sp, #144]
; NONEON-NOSVE-NEXT: ldr w24, [sp, #152]
; NONEON-NOSVE-NEXT: str q3, [sp, #16]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #208]
; NONEON-NOSVE-NEXT: ldr w4, [sp, #112]
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill
; NONEON-NOSVE-NEXT: ldr w8, [sp, #216]
; NONEON-NOSVE-NEXT: ldr w27, [sp, #16]
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: ldr w28, [sp, #24]
; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #64]
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: str w9, [sp, #344]
; NONEON-NOSVE-NEXT: add w9, w27, w27
; NONEON-NOSVE-NEXT: str w8, [sp, #348]
; NONEON-NOSVE-NEXT: add w8, w28, w28
; NONEON-NOSVE-NEXT: ldr w7, [sp, #120]
; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #240]
; NONEON-NOSVE-NEXT: ldr w18, [sp, #128]
; NONEON-NOSVE-NEXT: ldr w0, [sp, #136]
; NONEON-NOSVE-NEXT: str w8, [sp, #340]
; NONEON-NOSVE-NEXT: add w8, w26, w26
; NONEON-NOSVE-NEXT: ldr w19, [sp, #240]
; NONEON-NOSVE-NEXT: str w9, [sp, #336]
; NONEON-NOSVE-NEXT: add w9, w25, w25
; NONEON-NOSVE-NEXT: ldr w20, [sp, #248]
; NONEON-NOSVE-NEXT: str w8, [sp, #332]
; NONEON-NOSVE-NEXT: add w8, w24, w24
; NONEON-NOSVE-NEXT: ldr w16, [sp, #256]
; NONEON-NOSVE-NEXT: str w9, [sp, #328]
; NONEON-NOSVE-NEXT: add w9, w23, w23
; NONEON-NOSVE-NEXT: ldr w17, [sp, #264]
; NONEON-NOSVE-NEXT: str q19, [sp, #96]
; NONEON-NOSVE-NEXT: ldr w14, [sp, #64]
; NONEON-NOSVE-NEXT: ldr w15, [sp, #72]
; NONEON-NOSVE-NEXT: str w8, [sp, #324]
; NONEON-NOSVE-NEXT: add w8, w22, w22
; NONEON-NOSVE-NEXT: ldr w2, [sp, #96]
; NONEON-NOSVE-NEXT: str w9, [sp, #320]
; NONEON-NOSVE-NEXT: add w9, w21, w21
; NONEON-NOSVE-NEXT: ldr w3, [sp, #104]
; NONEON-NOSVE-NEXT: str w8, [sp, #380]
; NONEON-NOSVE-NEXT: add w8, w20, w20
; NONEON-NOSVE-NEXT: ldr w13, [sp, #56]
; NONEON-NOSVE-NEXT: str w9, [sp, #376]
; NONEON-NOSVE-NEXT: add w9, w19, w19
; NONEON-NOSVE-NEXT: ldr w10, [sp, #80]
; NONEON-NOSVE-NEXT: str w8, [sp, #372]
; NONEON-NOSVE-NEXT: add w8, w7, w7
; NONEON-NOSVE-NEXT: ldr w11, [sp, #88]
; NONEON-NOSVE-NEXT: str w9, [sp, #368]
; NONEON-NOSVE-NEXT: add w9, w4, w4
; NONEON-NOSVE-NEXT: ldr w29, [sp, #224]
; NONEON-NOSVE-NEXT: str w8, [sp, #316]
; NONEON-NOSVE-NEXT: add w8, w3, w3
; NONEON-NOSVE-NEXT: ldr w30, [sp, #232]
; NONEON-NOSVE-NEXT: str w9, [sp, #312]
; NONEON-NOSVE-NEXT: add w9, w2, w2
; NONEON-NOSVE-NEXT: str w8, [sp, #308]
; NONEON-NOSVE-NEXT: add w8, w0, w0
; NONEON-NOSVE-NEXT: str w9, [sp, #304]
; NONEON-NOSVE-NEXT: add w9, w18, w18
; NONEON-NOSVE-NEXT: str w8, [sp, #396]
; NONEON-NOSVE-NEXT: add w8, w17, w17
; NONEON-NOSVE-NEXT: str w9, [sp, #392]
; NONEON-NOSVE-NEXT: add w9, w16, w16
; NONEON-NOSVE-NEXT: str w8, [sp, #388]
; NONEON-NOSVE-NEXT: add w8, w15, w15
; NONEON-NOSVE-NEXT: str w9, [sp, #384]
; NONEON-NOSVE-NEXT: add w9, w14, w14
; NONEON-NOSVE-NEXT: str w8, [sp, #284]
; NONEON-NOSVE-NEXT: add w8, w13, w13
; NONEON-NOSVE-NEXT: str w9, [sp, #280]
; NONEON-NOSVE-NEXT: add w9, w12, w12
; NONEON-NOSVE-NEXT: str w8, [sp, #276]
; NONEON-NOSVE-NEXT: add w8, w11, w11
; NONEON-NOSVE-NEXT: str w9, [sp, #272]
; NONEON-NOSVE-NEXT: add w9, w10, w10
; NONEON-NOSVE-NEXT: str w8, [sp, #300]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: str w9, [sp, #296]
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
; NONEON-NOSVE-NEXT: add w8, w8, w8
; NONEON-NOSVE-NEXT: str w5, [sp, #364]
; NONEON-NOSVE-NEXT: add w5, w30, w30
; NONEON-NOSVE-NEXT: add w9, w9, w9
; NONEON-NOSVE-NEXT: str w6, [sp, #360]
; NONEON-NOSVE-NEXT: add w6, w29, w29
; NONEON-NOSVE-NEXT: str w5, [sp, #356]
; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #304]
; NONEON-NOSVE-NEXT: str w6, [sp, #352]
; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #368]
; NONEON-NOSVE-NEXT: str w8, [sp, #292]
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #336]
; NONEON-NOSVE-NEXT: str w9, [sp, #288]
; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #480] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #272]
; NONEON-NOSVE-NEXT: stp q4, q3, [x1, #32]
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #464] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q7, q6, [x1, #64]
; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #448] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #96]
; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #432] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #416] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #400] // 16-byte Folded Reload
; NONEON-NOSVE-NEXT: add sp, sp, #496
; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i32>
%c = add <32 x i32> %b, %b
store <32 x i32> %c, ptr %out
ret void
}