From 56ebe64ce69adde8b10793de7aa571df00c75e08 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 6 Jun 2025 17:03:10 +0100 Subject: [PATCH] [AArch64] Enable aggressivelyPreferBuildVectorSources (#142729) This helps to remove some inefficient buildvector lowering by converting extract_vector_elt(buildvector) to the original source. --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 + .../GlobalISel/combine-build-vector.mir | 8 +- .../GlobalISel/combine-extract-vec-elt.mir | 4 +- .../GlobalISel/combine-insert-vec-elt.mir | 2 +- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 612 ++- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 396 +- llvm/test/CodeGen/AArch64/fptrunc.ll | 28 +- llvm/test/CodeGen/AArch64/itofp.ll | 228 +- llvm/test/CodeGen/AArch64/sext.ll | 6 +- ...sve-streaming-mode-fixed-length-bitcast.ll | 19 +- ...e-streaming-mode-fixed-length-ext-loads.ll | 260 +- ...aming-mode-fixed-length-fp-extend-trunc.ll | 234 +- ...streaming-mode-fixed-length-int-extends.ll | 3988 ++++++----------- ...e-streaming-mode-fixed-length-int-to-fp.ll | 396 +- ...-streaming-mode-fixed-length-ld2-alloca.ll | 20 +- llvm/test/CodeGen/AArch64/zext-to-tbl.ll | 171 +- 16 files changed, 2370 insertions(+), 4006 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7b7f020f7c77..cc59e43e0622 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -449,6 +449,10 @@ public: /// Enable aggressive FMA fusion on targets that want it. bool enableAggressiveFMAFusion(EVT VT) const override; + bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { + return true; + } + /// Returns the size of the platform's va_list object. unsigned getVaListSizeInBits(const DataLayout &DL) const override; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir index 93f6051c3bd3..5189582d0b6a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -55,13 +55,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 - ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) - ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) - ; CHECK-NEXT: $x0 = COPY %extract(s64) - ; CHECK-NEXT: $x1 = COPY %extract2(s64) + ; CHECK-NEXT: $x0 = COPY %arg1(s64) + ; CHECK-NEXT: $x1 = COPY %arg2(s64) ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir index e2933690c7c5..e81447a1de4b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir @@ -175,10 +175,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 - ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) - ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: $x0 = COPY %arg1(s64) ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir index c000a8e635bc..86c0575961a1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir @@ -278,8 +278,8 @@ body: | ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>)) ; CHECK-NEXT: RET_ReallyLR %3:_(s8) = G_CONSTANT i8 127 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index a33b1ef569fc..04dfdedb4275 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -726,7 +726,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-LABEL: test_signed_v3f128_v3i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #128 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x30, x23, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 128 @@ -734,13 +734,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w23, -40 ; CHECK-SD-NEXT: .cfi_offset w30, -48 -; CHECK-SD-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill -; CHECK-SD-NEXT: mov v2.16b, v1.16b +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: stp q2, q1, [sp, #32] // 32-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -755,15 +755,15 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov w21, #2147483647 // =0x7fffffff +; CHECK-SD-NEXT: mov w22, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: csel w19, w21, w19, gt +; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: csel w22, wzr, w19, ne +; CHECK-SD-NEXT: csel w21, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov w19, w0 @@ -775,16 +775,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: csel w19, w21, w19, gt +; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 -; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: csel w8, wzr, w19, ne -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: mov v0.s[1], w22 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: csel w23, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov w19, w0 @@ -796,16 +793,17 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: csel w19, w21, w19, gt +; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-SD-NEXT: csel w8, wzr, w19, ne ; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: mov v0.s[1], w23 +; CHECK-SD-NEXT: ldp x30, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[2], w21 ; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: add sp, sp, #128 ; CHECK-SD-NEXT: ret ; @@ -4890,63 +4888,61 @@ define <16 x i16> @test_signed_v16f16_v16i16(<16 x half> %f) { define <8 x i8> @test_signed_v8f64_v8i8(<8 x double> %f) { ; CHECK-SD-LABEL: test_signed_v8f64_v8i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d4, v3.d[1] -; CHECK-SD-NEXT: fcvtzs w11, d3 -; CHECK-SD-NEXT: mov w9, #127 // =0x7f -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzs w13, d2 -; CHECK-SD-NEXT: fcvtzs w15, d1 -; CHECK-SD-NEXT: fcvtzs w17, d0 -; CHECK-SD-NEXT: fcvtzs w8, d4 -; CHECK-SD-NEXT: mov d4, v2.d[1] -; CHECK-SD-NEXT: mov d2, v0.d[1] -; CHECK-SD-NEXT: fcvtzs w14, d3 -; CHECK-SD-NEXT: cmp w8, #127 -; CHECK-SD-NEXT: fcvtzs w12, d4 -; CHECK-SD-NEXT: fcvtzs w16, d2 -; CHECK-SD-NEXT: csel w10, w8, w9, lt -; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80 +; CHECK-SD-NEXT: mov d4, v0.d[1] +; CHECK-SD-NEXT: fcvtzs w10, d0 +; CHECK-SD-NEXT: mov w8, #127 // =0x7f +; CHECK-SD-NEXT: fcvtzs w12, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] +; CHECK-SD-NEXT: fcvtzs w9, d4 +; CHECK-SD-NEXT: cmp w9, #127 +; CHECK-SD-NEXT: csel w11, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt ; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: csel w10, w10, w8, gt -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: csel w11, w11, w9, lt -; CHECK-SD-NEXT: cmn w11, #128 -; CHECK-SD-NEXT: csel w11, w11, w8, gt +; CHECK-SD-NEXT: csel w10, w10, w9, gt ; CHECK-SD-NEXT: cmp w12, #127 -; CHECK-SD-NEXT: csel w12, w12, w9, lt -; CHECK-SD-NEXT: fmov s3, w11 -; CHECK-SD-NEXT: cmn w12, #128 -; CHECK-SD-NEXT: csel w12, w12, w8, gt -; CHECK-SD-NEXT: cmp w13, #127 -; CHECK-SD-NEXT: csel w13, w13, w9, lt -; CHECK-SD-NEXT: mov v3.s[1], w10 -; CHECK-SD-NEXT: cmn w13, #128 -; CHECK-SD-NEXT: csel w13, w13, w8, gt -; CHECK-SD-NEXT: cmp w14, #127 -; CHECK-SD-NEXT: csel w14, w14, w9, lt -; CHECK-SD-NEXT: fmov s2, w13 -; CHECK-SD-NEXT: cmn w14, #128 -; CHECK-SD-NEXT: csel w14, w14, w8, gt -; CHECK-SD-NEXT: cmp w15, #127 -; CHECK-SD-NEXT: csel w15, w15, w9, lt -; CHECK-SD-NEXT: mov v2.s[1], w12 -; CHECK-SD-NEXT: cmn w15, #128 -; CHECK-SD-NEXT: csel w15, w15, w8, gt -; CHECK-SD-NEXT: cmp w16, #127 -; CHECK-SD-NEXT: csel w11, w16, w9, lt -; CHECK-SD-NEXT: fmov s1, w15 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.b[1], w11 +; CHECK-SD-NEXT: csel w11, w12, w8, lt ; CHECK-SD-NEXT: cmn w11, #128 -; CHECK-SD-NEXT: csel w10, w11, w8, gt -; CHECK-SD-NEXT: cmp w17, #127 -; CHECK-SD-NEXT: csel w9, w17, w9, lt -; CHECK-SD-NEXT: mov v1.s[1], w14 -; CHECK-SD-NEXT: cmn w9, #128 -; CHECK-SD-NEXT: csel w8, w9, w8, gt -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: adrp x8, .LCPI82_0 -; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: mov v0.b[2], w11 +; CHECK-SD-NEXT: fcvtzs w11, d2 +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: mov v0.b[3], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[4], w11 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: fcvtzs w11, d3 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.b[5], w10 +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[6], w11 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w8, w10, w8, lt +; CHECK-SD-NEXT: cmn w8, #128 +; CHECK-SD-NEXT: csel w8, w8, w9, gt +; CHECK-SD-NEXT: mov v0.b[7], w8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_signed_v8f64_v8i8: @@ -4990,11 +4986,9 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-SD-NEXT: mov d16, v0.d[1] ; CHECK-SD-NEXT: fcvtzs w10, d0 ; CHECK-SD-NEXT: mov w8, #127 // =0x7f -; CHECK-SD-NEXT: mov d0, v1.d[1] -; CHECK-SD-NEXT: fcvtzs w13, d1 -; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: fcvtzs w12, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: fcvtzs w9, d16 -; CHECK-SD-NEXT: fcvtzs w12, d0 ; CHECK-SD-NEXT: cmp w9, #127 ; CHECK-SD-NEXT: csel w11, w9, w8, lt ; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 @@ -5006,115 +5000,94 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-SD-NEXT: csel w10, w10, w9, gt ; CHECK-SD-NEXT: cmp w12, #127 ; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: csel w10, w12, w8, lt -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w13, #127 -; CHECK-SD-NEXT: csel w12, w13, w8, lt -; CHECK-SD-NEXT: mov v0.s[1], w11 -; CHECK-SD-NEXT: fcvtzs w11, d1 -; CHECK-SD-NEXT: cmn w12, #128 -; CHECK-SD-NEXT: csel w12, w12, w9, gt -; CHECK-SD-NEXT: fmov s1, w12 -; CHECK-SD-NEXT: fcvtzs w12, d2 -; CHECK-SD-NEXT: mov d2, v3.d[1] -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: mov w13, v0.s[1] -; CHECK-SD-NEXT: mov v1.s[1], w10 -; CHECK-SD-NEXT: csel w10, w11, w8, lt -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: fcvtzs w11, d2 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w12, #127 -; CHECK-SD-NEXT: mov v0.b[1], w13 -; CHECK-SD-NEXT: csel w12, w12, w8, lt -; CHECK-SD-NEXT: cmn w12, #128 -; CHECK-SD-NEXT: mov w13, v1.s[1] -; CHECK-SD-NEXT: csel w12, w12, w9, gt -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: fmov s2, w12 -; CHECK-SD-NEXT: fcvtzs w12, d3 -; CHECK-SD-NEXT: mov d3, v4.d[1] -; CHECK-SD-NEXT: mov v0.b[2], v1.b[0] -; CHECK-SD-NEXT: mov v2.s[1], w10 -; CHECK-SD-NEXT: csel w10, w11, w8, lt -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: fcvtzs w11, d3 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w12, #127 -; CHECK-SD-NEXT: mov v0.b[3], w13 -; CHECK-SD-NEXT: csel w12, w12, w8, lt -; CHECK-SD-NEXT: cmn w12, #128 -; CHECK-SD-NEXT: mov w13, v2.s[1] -; CHECK-SD-NEXT: csel w12, w12, w9, gt -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: fmov s3, w12 -; CHECK-SD-NEXT: fcvtzs w12, d4 -; CHECK-SD-NEXT: mov v0.b[4], v2.b[0] -; CHECK-SD-NEXT: mov d4, v5.d[1] -; CHECK-SD-NEXT: mov v3.s[1], w10 -; CHECK-SD-NEXT: csel w10, w11, w8, lt -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: mov v0.b[5], w13 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w12, #127 -; CHECK-SD-NEXT: fcvtzs w11, d4 -; CHECK-SD-NEXT: csel w12, w12, w8, lt -; CHECK-SD-NEXT: cmn w12, #128 -; CHECK-SD-NEXT: mov w13, v3.s[1] -; CHECK-SD-NEXT: csel w12, w12, w9, gt -; CHECK-SD-NEXT: mov v0.b[6], v3.b[0] -; CHECK-SD-NEXT: fmov s4, w12 -; CHECK-SD-NEXT: fcvtzs w12, d5 -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: mov d5, v6.d[1] -; CHECK-SD-NEXT: mov v4.s[1], w10 -; CHECK-SD-NEXT: csel w10, w11, w8, lt -; CHECK-SD-NEXT: mov v0.b[7], w13 -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w12, #127 -; CHECK-SD-NEXT: fcvtzs w13, d5 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.b[1], w11 ; CHECK-SD-NEXT: csel w11, w12, w8, lt ; CHECK-SD-NEXT: cmn w11, #128 -; CHECK-SD-NEXT: mov w12, v4.s[1] -; CHECK-SD-NEXT: mov v0.b[8], v4.b[0] ; CHECK-SD-NEXT: csel w11, w11, w9, gt -; CHECK-SD-NEXT: fmov s5, w11 -; CHECK-SD-NEXT: fcvtzs w11, d6 -; CHECK-SD-NEXT: cmp w13, #127 -; CHECK-SD-NEXT: mov d6, v7.d[1] -; CHECK-SD-NEXT: mov v0.b[9], w12 -; CHECK-SD-NEXT: mov v5.s[1], w10 -; CHECK-SD-NEXT: csel w10, w13, w8, lt +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: mov v0.b[2], w11 +; CHECK-SD-NEXT: fcvtzs w11, d2 ; CHECK-SD-NEXT: cmn w10, #128 ; CHECK-SD-NEXT: csel w10, w10, w9, gt ; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: fcvtzs w13, d6 +; CHECK-SD-NEXT: mov v0.b[3], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[4], w11 +; CHECK-SD-NEXT: fcvtzs w11, d3 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.b[5], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v4.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[6], w11 +; CHECK-SD-NEXT: fcvtzs w11, d4 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: mov v0.b[7], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v5.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[8], w11 +; CHECK-SD-NEXT: fcvtzs w11, d5 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.b[9], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v6.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[10], w11 +; CHECK-SD-NEXT: fcvtzs w11, d6 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: mov v0.b[11], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v7.d[1] +; CHECK-SD-NEXT: cmn w11, #128 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.b[12], w11 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: fcvtzs w11, d7 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #128 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.b[13], w10 +; CHECK-SD-NEXT: cmp w11, #127 +; CHECK-SD-NEXT: fcvtzs w10, d1 ; CHECK-SD-NEXT: csel w11, w11, w8, lt ; CHECK-SD-NEXT: cmn w11, #128 -; CHECK-SD-NEXT: mov v0.b[10], v5.b[0] -; CHECK-SD-NEXT: mov w12, v5.s[1] ; CHECK-SD-NEXT: csel w11, w11, w9, gt -; CHECK-SD-NEXT: fmov s6, w11 -; CHECK-SD-NEXT: fcvtzs w11, d7 -; CHECK-SD-NEXT: cmp w13, #127 -; CHECK-SD-NEXT: mov v0.b[11], w12 -; CHECK-SD-NEXT: mov v6.s[1], w10 -; CHECK-SD-NEXT: csel w10, w13, w8, lt -; CHECK-SD-NEXT: cmn w10, #128 -; CHECK-SD-NEXT: csel w10, w10, w9, gt -; CHECK-SD-NEXT: cmp w11, #127 -; CHECK-SD-NEXT: csel w8, w11, w8, lt +; CHECK-SD-NEXT: mov v0.b[14], w11 +; CHECK-SD-NEXT: cmp w10, #127 +; CHECK-SD-NEXT: csel w8, w10, w8, lt ; CHECK-SD-NEXT: cmn w8, #128 -; CHECK-SD-NEXT: mov v0.b[12], v6.b[0] -; CHECK-SD-NEXT: mov w11, v6.s[1] ; CHECK-SD-NEXT: csel w8, w8, w9, gt -; CHECK-SD-NEXT: fmov s7, w8 -; CHECK-SD-NEXT: mov v0.b[13], w11 -; CHECK-SD-NEXT: mov v7.s[1], w10 -; CHECK-SD-NEXT: mov v0.b[14], v7.b[0] -; CHECK-SD-NEXT: mov w8, v7.s[1] ; CHECK-SD-NEXT: mov v0.b[15], w8 ; CHECK-SD-NEXT: ret ; @@ -5179,63 +5152,60 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) { ; CHECK-SD-LABEL: test_signed_v8f64_v8i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d4, v3.d[1] +; CHECK-SD-NEXT: mov d4, v0.d[1] ; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff -; CHECK-SD-NEXT: fcvtzs w11, d3 -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzs w13, d2 -; CHECK-SD-NEXT: fcvtzs w15, d1 -; CHECK-SD-NEXT: fcvtzs w17, d0 +; CHECK-SD-NEXT: fcvtzs w10, d0 +; CHECK-SD-NEXT: fcvtzs w12, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: fcvtzs w9, d4 -; CHECK-SD-NEXT: mov d4, v2.d[1] -; CHECK-SD-NEXT: mov d2, v0.d[1] -; CHECK-SD-NEXT: fcvtzs w14, d3 ; CHECK-SD-NEXT: cmp w9, w8 -; CHECK-SD-NEXT: fcvtzs w12, d4 -; CHECK-SD-NEXT: fcvtzs w16, d2 -; CHECK-SD-NEXT: csel w10, w9, w8, lt +; CHECK-SD-NEXT: csel w11, w9, w8, lt ; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.h[1], w11 +; CHECK-SD-NEXT: csel w11, w12, w8, lt +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: mov v0.h[2], w11 +; CHECK-SD-NEXT: fcvtzs w11, d2 ; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-SD-NEXT: csel w10, w10, w9, gt ; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: mov v0.h[3], w10 +; CHECK-SD-NEXT: fcvtzs w10, d1 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v0.h[4], w11 +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: fcvtzs w11, d3 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.h[5], w10 +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: fcvtzs w10, d1 ; CHECK-SD-NEXT: csel w11, w11, w8, lt ; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-SD-NEXT: csel w11, w11, w9, gt -; CHECK-SD-NEXT: cmp w12, w8 -; CHECK-SD-NEXT: csel w12, w12, w8, lt -; CHECK-SD-NEXT: fmov s3, w11 -; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w12, w12, w9, gt -; CHECK-SD-NEXT: cmp w13, w8 -; CHECK-SD-NEXT: csel w13, w13, w8, lt -; CHECK-SD-NEXT: mov v3.s[1], w10 -; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w13, w13, w9, gt -; CHECK-SD-NEXT: cmp w14, w8 -; CHECK-SD-NEXT: csel w14, w14, w8, lt -; CHECK-SD-NEXT: fmov s2, w13 -; CHECK-SD-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w14, w14, w9, gt -; CHECK-SD-NEXT: cmp w15, w8 -; CHECK-SD-NEXT: csel w15, w15, w8, lt -; CHECK-SD-NEXT: mov v2.s[1], w12 -; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w15, w15, w9, gt -; CHECK-SD-NEXT: cmp w16, w8 -; CHECK-SD-NEXT: csel w11, w16, w8, lt -; CHECK-SD-NEXT: fmov s1, w15 -; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w10, w11, w9, gt -; CHECK-SD-NEXT: cmp w17, w8 -; CHECK-SD-NEXT: csel w8, w17, w8, lt -; CHECK-SD-NEXT: mov v1.s[1], w14 +; CHECK-SD-NEXT: mov v0.h[6], w11 +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: csel w8, w10, w8, lt ; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-SD-NEXT: csel w8, w8, w9, gt -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: adrp x8, .LCPI84_0 -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-SD-NEXT: mov v0.h[7], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_signed_v8f64_v8i16: @@ -5275,116 +5245,112 @@ define <8 x i16> @test_signed_v8f64_v8i16(<8 x double> %f) { define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-SD-LABEL: test_signed_v16f64_v16i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d16, v3.d[1] -; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff -; CHECK-SD-NEXT: fcvtzs w11, d3 -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzs w14, d2 -; CHECK-SD-NEXT: fcvtzs w15, d1 -; CHECK-SD-NEXT: mov d1, v7.d[1] -; CHECK-SD-NEXT: fcvtzs w18, d0 -; CHECK-SD-NEXT: fcvtzs w1, d7 -; CHECK-SD-NEXT: fcvtzs w2, d6 -; CHECK-SD-NEXT: fcvtzs w4, d5 -; CHECK-SD-NEXT: fcvtzs w6, d4 -; CHECK-SD-NEXT: fcvtzs w8, d16 -; CHECK-SD-NEXT: mov d16, v2.d[1] -; CHECK-SD-NEXT: mov d2, v0.d[1] -; CHECK-SD-NEXT: mov d0, v6.d[1] +; CHECK-SD-NEXT: mov d16, v0.d[1] +; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff +; CHECK-SD-NEXT: fcvtzs w11, d0 +; CHECK-SD-NEXT: mov d0, v1.d[1] +; CHECK-SD-NEXT: fcvtzs w12, d1 +; CHECK-SD-NEXT: fcvtzs w15, d3 +; CHECK-SD-NEXT: mov d1, v4.d[1] +; CHECK-SD-NEXT: fcvtzs w1, d5 +; CHECK-SD-NEXT: fcvtzs w9, d16 +; CHECK-SD-NEXT: fcvtzs w14, d0 +; CHECK-SD-NEXT: mov d0, v2.d[1] ; CHECK-SD-NEXT: fcvtzs w0, d1 -; CHECK-SD-NEXT: cmp w8, w9 -; CHECK-SD-NEXT: fcvtzs w13, d16 -; CHECK-SD-NEXT: fcvtzs w17, d2 -; CHECK-SD-NEXT: csel w10, w8, w9, lt -; CHECK-SD-NEXT: mov w8, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmp w9, w8 +; CHECK-SD-NEXT: csel w10, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w10, w10, w8, gt -; CHECK-SD-NEXT: cmp w11, w9 -; CHECK-SD-NEXT: csel w11, w11, w9, lt +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: csel w11, w11, w8, lt ; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w12, w11, w8, gt -; CHECK-SD-NEXT: cmp w13, w9 -; CHECK-SD-NEXT: csel w11, w13, w9, lt -; CHECK-SD-NEXT: fcvtzs w13, d3 +; CHECK-SD-NEXT: csel w13, w11, w9, gt +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: csel w11, w12, w8, lt +; CHECK-SD-NEXT: fcvtzs w12, d2 +; CHECK-SD-NEXT: mov d2, v5.d[1] ; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w11, w11, w8, gt -; CHECK-SD-NEXT: cmp w14, w9 -; CHECK-SD-NEXT: csel w14, w14, w9, lt -; CHECK-SD-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w14, w14, w8, gt -; CHECK-SD-NEXT: cmp w13, w9 -; CHECK-SD-NEXT: csel w13, w13, w9, lt -; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w13, w13, w8, gt -; CHECK-SD-NEXT: cmp w15, w9 -; CHECK-SD-NEXT: csel w15, w15, w9, lt +; CHECK-SD-NEXT: csel w17, w11, w9, gt +; CHECK-SD-NEXT: cmp w14, w8 +; CHECK-SD-NEXT: csel w11, w14, w8, lt +; CHECK-SD-NEXT: fcvtzs w14, d0 +; CHECK-SD-NEXT: mov d0, v3.d[1] +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: csel w12, w12, w8, lt +; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: fcvtzs w18, d0 +; CHECK-SD-NEXT: fmov s0, w13 +; CHECK-SD-NEXT: csel w16, w12, w9, gt +; CHECK-SD-NEXT: cmp w14, w8 +; CHECK-SD-NEXT: csel w12, w14, w8, lt +; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: mov v0.h[1], w10 +; CHECK-SD-NEXT: fcvtzs w10, d2 +; CHECK-SD-NEXT: csel w14, w12, w9, gt +; CHECK-SD-NEXT: cmp w15, w8 +; CHECK-SD-NEXT: mov d2, v6.d[1] +; CHECK-SD-NEXT: csel w12, w15, w8, lt +; CHECK-SD-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w12, w12, w9, gt +; CHECK-SD-NEXT: cmp w18, w8 +; CHECK-SD-NEXT: mov v0.h[2], w17 +; CHECK-SD-NEXT: csel w15, w18, w8, lt +; CHECK-SD-NEXT: fcvtzs w18, d4 ; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w16, w15, w8, gt -; CHECK-SD-NEXT: cmp w17, w9 -; CHECK-SD-NEXT: csel w15, w17, w9, lt -; CHECK-SD-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w15, w15, w8, gt -; CHECK-SD-NEXT: cmp w18, w9 -; CHECK-SD-NEXT: csel w17, w18, w9, lt -; CHECK-SD-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w17, w17, w8, gt -; CHECK-SD-NEXT: cmp w0, w9 -; CHECK-SD-NEXT: csel w18, w0, w9, lt -; CHECK-SD-NEXT: fcvtzs w0, d0 -; CHECK-SD-NEXT: mov d0, v5.d[1] -; CHECK-SD-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w18, w18, w8, gt -; CHECK-SD-NEXT: cmp w1, w9 -; CHECK-SD-NEXT: csel w1, w1, w9, lt -; CHECK-SD-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: fcvtzs w3, d0 -; CHECK-SD-NEXT: mov d0, v4.d[1] -; CHECK-SD-NEXT: csel w1, w1, w8, gt -; CHECK-SD-NEXT: cmp w0, w9 -; CHECK-SD-NEXT: csel w0, w0, w9, lt -; CHECK-SD-NEXT: fmov s7, w1 +; CHECK-SD-NEXT: csel w15, w15, w9, gt +; CHECK-SD-NEXT: cmp w0, w8 +; CHECK-SD-NEXT: csel w0, w0, w8, lt +; CHECK-SD-NEXT: mov v0.h[3], w11 ; CHECK-SD-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w0, w0, w8, gt -; CHECK-SD-NEXT: cmp w2, w9 -; CHECK-SD-NEXT: fcvtzs w5, d0 -; CHECK-SD-NEXT: csel w2, w2, w9, lt -; CHECK-SD-NEXT: fmov s3, w12 -; CHECK-SD-NEXT: mov v7.s[1], w18 -; CHECK-SD-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w2, w2, w8, gt -; CHECK-SD-NEXT: cmp w3, w9 -; CHECK-SD-NEXT: csel w3, w3, w9, lt -; CHECK-SD-NEXT: mov v3.s[1], w10 -; CHECK-SD-NEXT: fmov s6, w2 -; CHECK-SD-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: fmov s2, w14 -; CHECK-SD-NEXT: csel w3, w3, w8, gt -; CHECK-SD-NEXT: cmp w4, w9 -; CHECK-SD-NEXT: csel w4, w4, w9, lt -; CHECK-SD-NEXT: mov v6.s[1], w0 -; CHECK-SD-NEXT: cmn w4, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: mov v2.s[1], w11 -; CHECK-SD-NEXT: csel w12, w4, w8, gt -; CHECK-SD-NEXT: cmp w5, w9 -; CHECK-SD-NEXT: fmov s1, w16 -; CHECK-SD-NEXT: csel w10, w5, w9, lt -; CHECK-SD-NEXT: fmov s5, w12 +; CHECK-SD-NEXT: csel w0, w0, w9, gt +; CHECK-SD-NEXT: cmp w18, w8 +; CHECK-SD-NEXT: csel w18, w18, w8, lt +; CHECK-SD-NEXT: cmn w18, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: mov v0.h[4], w16 +; CHECK-SD-NEXT: csel w13, w18, w9, gt +; CHECK-SD-NEXT: cmp w1, w8 +; CHECK-SD-NEXT: fmov s1, w13 +; CHECK-SD-NEXT: csel w13, w1, w8, lt +; CHECK-SD-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w13, w13, w9, gt +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: mov v0.h[5], w14 +; CHECK-SD-NEXT: mov v1.h[1], w0 +; CHECK-SD-NEXT: csel w10, w10, w8, lt ; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w10, w10, w8, gt -; CHECK-SD-NEXT: cmp w6, w9 -; CHECK-SD-NEXT: mov v1.s[1], w13 -; CHECK-SD-NEXT: csel w9, w6, w9, lt -; CHECK-SD-NEXT: mov v5.s[1], w3 -; CHECK-SD-NEXT: fmov s0, w17 -; CHECK-SD-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-SD-NEXT: csel w8, w9, w8, gt -; CHECK-SD-NEXT: fmov s4, w8 -; CHECK-SD-NEXT: mov v0.s[1], w15 -; CHECK-SD-NEXT: adrp x8, .LCPI85_0 -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] -; CHECK-SD-NEXT: mov v4.s[1], w10 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v0.h[6], w12 +; CHECK-SD-NEXT: mov v1.h[2], w13 +; CHECK-SD-NEXT: fcvtzs w13, d6 +; CHECK-SD-NEXT: mov v0.h[7], w15 +; CHECK-SD-NEXT: cmp w13, w8 +; CHECK-SD-NEXT: mov v1.h[3], w10 +; CHECK-SD-NEXT: fcvtzs w10, d2 +; CHECK-SD-NEXT: csel w11, w13, w8, lt +; CHECK-SD-NEXT: mov d2, v7.d[1] +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v1.h[4], w11 +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: fcvtzs w11, d7 +; CHECK-SD-NEXT: csel w10, w10, w8, lt +; CHECK-SD-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w10, w10, w9, gt +; CHECK-SD-NEXT: mov v1.h[5], w10 +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: fcvtzs w10, d2 +; CHECK-SD-NEXT: csel w11, w11, w8, lt +; CHECK-SD-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w11, w11, w9, gt +; CHECK-SD-NEXT: mov v1.h[6], w11 +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: csel w8, w10, w8, lt +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w8, w8, w9, gt +; CHECK-SD-NEXT: mov v1.h[7], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_signed_v16f64_v16i16: diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index b1b5154a57b4..099f43edfca6 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -645,18 +645,18 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x30, x21, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w30, -32 -; CHECK-SD-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill -; CHECK-SD-NEXT: mov v2.16b, v1.16b +; CHECK-SD-NEXT: stp q1, q0, [sp, #48] // 32-byte Folded Spill ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill ; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -681,13 +681,10 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, lt ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: csinv w8, w19, wzr, le -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: mov v0.s[1], w20 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: csinv w21, w19, wzr, le ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov w19, w0 @@ -698,11 +695,12 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: csel w19, wzr, w0, lt ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-SD-NEXT: csinv w8, w19, wzr, le +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: mov v0.s[1], w21 +; CHECK-SD-NEXT: ldp x30, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[2], w20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: add sp, sp, #112 ; CHECK-SD-NEXT: ret ; @@ -4048,46 +4046,44 @@ define <16 x i16> @test_unsigned_v16f16_v16i16(<16 x half> %f) { define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-SD-LABEL: test_unsigned_v8f64_v8i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d4, v3.d[1] -; CHECK-SD-NEXT: mov d5, v2.d[1] -; CHECK-SD-NEXT: mov w11, #255 // =0xff -; CHECK-SD-NEXT: fcvtzu w9, d3 -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzu w12, d2 -; CHECK-SD-NEXT: fcvtzu w14, d1 -; CHECK-SD-NEXT: fcvtzu w8, d4 ; CHECK-SD-NEXT: mov d4, v0.d[1] -; CHECK-SD-NEXT: fcvtzu w10, d5 -; CHECK-SD-NEXT: fcvtzu w13, d3 -; CHECK-SD-NEXT: cmp w8, #255 -; CHECK-SD-NEXT: fcvtzu w15, d4 -; CHECK-SD-NEXT: csel w8, w8, w11, lo -; CHECK-SD-NEXT: cmp w9, #255 -; CHECK-SD-NEXT: csel w9, w9, w11, lo -; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: fmov s4, w9 -; CHECK-SD-NEXT: csel w9, w10, w11, lo -; CHECK-SD-NEXT: cmp w12, #255 ; CHECK-SD-NEXT: fcvtzu w10, d0 -; CHECK-SD-NEXT: mov v4.s[1], w8 -; CHECK-SD-NEXT: csel w8, w12, w11, lo -; CHECK-SD-NEXT: cmp w13, #255 -; CHECK-SD-NEXT: fmov s3, w8 -; CHECK-SD-NEXT: csel w8, w13, w11, lo -; CHECK-SD-NEXT: cmp w14, #255 -; CHECK-SD-NEXT: mov v3.s[1], w9 -; CHECK-SD-NEXT: csel w9, w14, w11, lo -; CHECK-SD-NEXT: cmp w15, #255 -; CHECK-SD-NEXT: fmov s2, w9 -; CHECK-SD-NEXT: csel w9, w15, w11, lo +; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: fcvtzu w11, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] +; CHECK-SD-NEXT: fcvtzu w9, d4 +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: csel w9, w9, w8, lo ; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: mov v2.s[1], w8 -; CHECK-SD-NEXT: csel w8, w10, w11, lo -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: adrp x8, .LCPI82_0 -; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-SD-NEXT: mov v1.s[1], w9 -; CHECK-SD-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: cmp w11, #255 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: csel w10, w11, w8, lo +; CHECK-SD-NEXT: mov v0.b[1], w9 +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.b[2], w10 +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: fcvtzu w10, d2 +; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[3], w9 +; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: mov v0.b[4], w10 +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: fcvtzu w10, d3 +; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[5], w9 +; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov v0.b[6], w10 +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: csel w8, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[7], w8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8: @@ -4120,98 +4116,75 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-SD-NEXT: mov d16, v0.d[1] ; CHECK-SD-NEXT: fcvtzu w10, d0 ; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: fcvtzu w11, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: fcvtzu w9, d16 -; CHECK-SD-NEXT: mov d16, v1.d[1] ; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: csel w9, w9, w8, lo ; CHECK-SD-NEXT: cmp w10, #255 ; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: cmp w11, #255 ; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: fcvtzu w10, d16 -; CHECK-SD-NEXT: mov d16, v2.d[1] -; CHECK-SD-NEXT: mov v0.s[1], w9 +; CHECK-SD-NEXT: csel w10, w11, w8, lo +; CHECK-SD-NEXT: mov v0.b[1], w9 ; CHECK-SD-NEXT: fcvtzu w9, d1 -; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.b[2], w10 ; CHECK-SD-NEXT: cmp w9, #255 -; CHECK-SD-NEXT: mov w11, v0.s[1] -; CHECK-SD-NEXT: csel w9, w9, w8, lo -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: fcvtzu w9, d16 -; CHECK-SD-NEXT: mov d16, v3.d[1] -; CHECK-SD-NEXT: mov v0.b[1], w11 -; CHECK-SD-NEXT: mov v1.s[1], w10 ; CHECK-SD-NEXT: fcvtzu w10, d2 -; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[3], w9 ; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: mov w11, v1.s[1] -; CHECK-SD-NEXT: mov v0.b[2], v1.b[0] -; CHECK-SD-NEXT: csel w10, w10, w8, lo -; CHECK-SD-NEXT: fmov s2, w10 -; CHECK-SD-NEXT: fcvtzu w10, d16 -; CHECK-SD-NEXT: mov d16, v4.d[1] -; CHECK-SD-NEXT: mov v0.b[3], w11 -; CHECK-SD-NEXT: mov v2.s[1], w9 -; CHECK-SD-NEXT: fcvtzu w9, d3 -; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 ; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: mov v0.b[4], w10 ; CHECK-SD-NEXT: cmp w9, #255 -; CHECK-SD-NEXT: mov w11, v2.s[1] -; CHECK-SD-NEXT: mov v0.b[4], v2.b[0] +; CHECK-SD-NEXT: fcvtzu w10, d3 ; CHECK-SD-NEXT: csel w9, w9, w8, lo -; CHECK-SD-NEXT: fmov s3, w9 -; CHECK-SD-NEXT: fcvtzu w9, d16 -; CHECK-SD-NEXT: mov d16, v5.d[1] -; CHECK-SD-NEXT: mov v0.b[5], w11 -; CHECK-SD-NEXT: mov v3.s[1], w10 +; CHECK-SD-NEXT: mov v0.b[5], w9 +; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v4.d[1] +; CHECK-SD-NEXT: mov v0.b[6], w10 +; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: fcvtzu w10, d4 -; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[7], w9 ; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: mov w11, v3.s[1] -; CHECK-SD-NEXT: mov v0.b[6], v3.b[0] -; CHECK-SD-NEXT: csel w10, w10, w8, lo -; CHECK-SD-NEXT: fmov s4, w10 -; CHECK-SD-NEXT: fcvtzu w10, d16 -; CHECK-SD-NEXT: mov v0.b[7], w11 -; CHECK-SD-NEXT: mov v4.s[1], w9 -; CHECK-SD-NEXT: fcvtzu w9, d5 -; CHECK-SD-NEXT: mov d5, v6.d[1] -; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 ; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v5.d[1] +; CHECK-SD-NEXT: mov v0.b[8], w10 ; CHECK-SD-NEXT: cmp w9, #255 -; CHECK-SD-NEXT: mov w11, v4.s[1] -; CHECK-SD-NEXT: mov v0.b[8], v4.b[0] +; CHECK-SD-NEXT: fcvtzu w10, d5 ; CHECK-SD-NEXT: csel w9, w9, w8, lo -; CHECK-SD-NEXT: fmov s16, w9 -; CHECK-SD-NEXT: fcvtzu w9, d5 -; CHECK-SD-NEXT: mov d5, v7.d[1] -; CHECK-SD-NEXT: mov v0.b[9], w11 -; CHECK-SD-NEXT: mov v16.s[1], w10 +; CHECK-SD-NEXT: mov v0.b[9], w9 +; CHECK-SD-NEXT: cmp w10, #255 +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov d1, v6.d[1] +; CHECK-SD-NEXT: mov v0.b[10], w10 +; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: fcvtzu w10, d6 -; CHECK-SD-NEXT: cmp w9, #255 ; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[11], w9 ; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: mov v0.b[10], v16.b[0] -; CHECK-SD-NEXT: mov w11, v16.s[1] +; CHECK-SD-NEXT: fcvtzu w9, d1 ; CHECK-SD-NEXT: csel w10, w10, w8, lo -; CHECK-SD-NEXT: fmov s6, w10 -; CHECK-SD-NEXT: fcvtzu w10, d7 -; CHECK-SD-NEXT: mov v0.b[11], w11 -; CHECK-SD-NEXT: mov v6.s[1], w9 -; CHECK-SD-NEXT: fcvtzu w9, d5 +; CHECK-SD-NEXT: mov d1, v7.d[1] +; CHECK-SD-NEXT: mov v0.b[12], w10 ; CHECK-SD-NEXT: cmp w9, #255 -; CHECK-SD-NEXT: mov v0.b[12], v6.b[0] -; CHECK-SD-NEXT: mov w11, v6.s[1] +; CHECK-SD-NEXT: fcvtzu w10, d7 ; CHECK-SD-NEXT: csel w9, w9, w8, lo +; CHECK-SD-NEXT: mov v0.b[13], w9 ; CHECK-SD-NEXT: cmp w10, #255 -; CHECK-SD-NEXT: csel w8, w10, w8, lo -; CHECK-SD-NEXT: fmov s5, w8 -; CHECK-SD-NEXT: mov v0.b[13], w11 -; CHECK-SD-NEXT: mov v5.s[1], w9 -; CHECK-SD-NEXT: mov v0.b[14], v5.b[0] -; CHECK-SD-NEXT: mov w8, v5.s[1] +; CHECK-SD-NEXT: fcvtzu w9, d1 +; CHECK-SD-NEXT: csel w10, w10, w8, lo +; CHECK-SD-NEXT: mov v0.b[14], w10 +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: csel w8, w9, w8, lo ; CHECK-SD-NEXT: mov v0.b[15], w8 ; CHECK-SD-NEXT: ret ; @@ -4257,46 +4230,43 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-SD-LABEL: test_unsigned_v8f64_v8i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d4, v3.d[1] -; CHECK-SD-NEXT: mov d5, v2.d[1] -; CHECK-SD-NEXT: mov w10, #65535 // =0xffff -; CHECK-SD-NEXT: fcvtzu w9, d3 -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzu w12, d2 -; CHECK-SD-NEXT: fcvtzu w14, d1 -; CHECK-SD-NEXT: fcvtzu w8, d4 ; CHECK-SD-NEXT: mov d4, v0.d[1] -; CHECK-SD-NEXT: fcvtzu w11, d5 -; CHECK-SD-NEXT: fcvtzu w13, d3 +; CHECK-SD-NEXT: fcvtzu w9, d0 +; CHECK-SD-NEXT: mov w10, #65535 // =0xffff +; CHECK-SD-NEXT: fcvtzu w11, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] +; CHECK-SD-NEXT: fcvtzu w8, d4 ; CHECK-SD-NEXT: cmp w8, w10 -; CHECK-SD-NEXT: fcvtzu w15, d4 ; CHECK-SD-NEXT: csel w8, w8, w10, lo ; CHECK-SD-NEXT: cmp w9, w10 ; CHECK-SD-NEXT: csel w9, w9, w10, lo ; CHECK-SD-NEXT: cmp w11, w10 -; CHECK-SD-NEXT: fmov s4, w9 +; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: csel w9, w11, w10, lo -; CHECK-SD-NEXT: cmp w12, w10 -; CHECK-SD-NEXT: fcvtzu w11, d0 -; CHECK-SD-NEXT: mov v4.s[1], w8 -; CHECK-SD-NEXT: csel w8, w12, w10, lo -; CHECK-SD-NEXT: cmp w13, w10 -; CHECK-SD-NEXT: fmov s3, w8 -; CHECK-SD-NEXT: csel w8, w13, w10, lo -; CHECK-SD-NEXT: cmp w14, w10 -; CHECK-SD-NEXT: mov v3.s[1], w9 -; CHECK-SD-NEXT: csel w9, w14, w10, lo -; CHECK-SD-NEXT: cmp w15, w10 -; CHECK-SD-NEXT: fmov s2, w9 -; CHECK-SD-NEXT: csel w9, w15, w10, lo -; CHECK-SD-NEXT: cmp w11, w10 -; CHECK-SD-NEXT: mov v2.s[1], w8 -; CHECK-SD-NEXT: csel w8, w11, w10, lo -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: adrp x8, .LCPI84_0 -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-SD-NEXT: mov v1.s[1], w9 -; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b +; CHECK-SD-NEXT: mov v0.h[1], w8 +; CHECK-SD-NEXT: fcvtzu w8, d1 +; CHECK-SD-NEXT: mov d1, v2.d[1] +; CHECK-SD-NEXT: mov v0.h[2], w9 +; CHECK-SD-NEXT: cmp w8, w10 +; CHECK-SD-NEXT: fcvtzu w9, d2 +; CHECK-SD-NEXT: csel w8, w8, w10, lo +; CHECK-SD-NEXT: mov v0.h[3], w8 +; CHECK-SD-NEXT: cmp w9, w10 +; CHECK-SD-NEXT: fcvtzu w8, d1 +; CHECK-SD-NEXT: csel w9, w9, w10, lo +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: mov v0.h[4], w9 +; CHECK-SD-NEXT: cmp w8, w10 +; CHECK-SD-NEXT: fcvtzu w9, d3 +; CHECK-SD-NEXT: csel w8, w8, w10, lo +; CHECK-SD-NEXT: mov v0.h[5], w8 +; CHECK-SD-NEXT: cmp w9, w10 +; CHECK-SD-NEXT: fcvtzu w8, d1 +; CHECK-SD-NEXT: csel w9, w9, w10, lo +; CHECK-SD-NEXT: mov v0.h[6], w9 +; CHECK-SD-NEXT: cmp w8, w10 +; CHECK-SD-NEXT: csel w8, w8, w10, lo +; CHECK-SD-NEXT: mov v0.h[7], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16: @@ -4325,83 +4295,79 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-SD-LABEL: test_unsigned_v16f64_v16i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov d16, v3.d[1] -; CHECK-SD-NEXT: mov d17, v2.d[1] +; CHECK-SD-NEXT: mov d16, v0.d[1] +; CHECK-SD-NEXT: mov d17, v1.d[1] ; CHECK-SD-NEXT: mov w8, #65535 // =0xffff -; CHECK-SD-NEXT: fcvtzu w9, d3 -; CHECK-SD-NEXT: mov d3, v1.d[1] -; CHECK-SD-NEXT: fcvtzu w10, d1 -; CHECK-SD-NEXT: mov d1, v0.d[1] -; CHECK-SD-NEXT: fcvtzu w11, d2 -; CHECK-SD-NEXT: fcvtzu w12, d0 -; CHECK-SD-NEXT: mov d0, v7.d[1] -; CHECK-SD-NEXT: mov d2, v6.d[1] -; CHECK-SD-NEXT: fcvtzu w14, d7 -; CHECK-SD-NEXT: fcvtzu w13, d16 -; CHECK-SD-NEXT: fcvtzu w16, d17 -; CHECK-SD-NEXT: fcvtzu w15, d6 -; CHECK-SD-NEXT: fcvtzu w17, d3 -; CHECK-SD-NEXT: mov d6, v5.d[1] -; CHECK-SD-NEXT: mov d3, v4.d[1] -; CHECK-SD-NEXT: fcvtzu w18, d1 +; CHECK-SD-NEXT: fcvtzu w10, d0 +; CHECK-SD-NEXT: mov d0, v2.d[1] +; CHECK-SD-NEXT: fcvtzu w11, d1 +; CHECK-SD-NEXT: mov d1, v3.d[1] +; CHECK-SD-NEXT: fcvtzu w13, d2 +; CHECK-SD-NEXT: mov d2, v4.d[1] +; CHECK-SD-NEXT: fcvtzu w18, d4 +; CHECK-SD-NEXT: fcvtzu w9, d16 +; CHECK-SD-NEXT: fcvtzu w12, d17 +; CHECK-SD-NEXT: fcvtzu w16, d1 +; CHECK-SD-NEXT: fcvtzu w17, d2 +; CHECK-SD-NEXT: mov d2, v5.d[1] +; CHECK-SD-NEXT: cmp w9, w8 +; CHECK-SD-NEXT: csel w14, w9, w8, lo +; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: fcvtzu w9, d0 +; CHECK-SD-NEXT: csel w15, w10, w8, lo +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: fcvtzu w10, d3 +; CHECK-SD-NEXT: csel w11, w11, w8, lo +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: fmov s0, w15 +; CHECK-SD-NEXT: csel w12, w12, w8, lo ; CHECK-SD-NEXT: cmp w13, w8 ; CHECK-SD-NEXT: csel w13, w13, w8, lo ; CHECK-SD-NEXT: cmp w9, w8 -; CHECK-SD-NEXT: csel w9, w9, w8, lo -; CHECK-SD-NEXT: cmp w16, w8 -; CHECK-SD-NEXT: fmov s19, w9 -; CHECK-SD-NEXT: csel w9, w16, w8, lo -; CHECK-SD-NEXT: cmp w11, w8 -; CHECK-SD-NEXT: fcvtzu w16, d0 -; CHECK-SD-NEXT: csel w11, w11, w8, lo -; CHECK-SD-NEXT: cmp w17, w8 -; CHECK-SD-NEXT: mov v19.s[1], w13 -; CHECK-SD-NEXT: csel w13, w17, w8, lo +; CHECK-SD-NEXT: csel w0, w9, w8, lo ; CHECK-SD-NEXT: cmp w10, w8 +; CHECK-SD-NEXT: mov v0.h[1], w14 ; CHECK-SD-NEXT: csel w10, w10, w8, lo -; CHECK-SD-NEXT: cmp w18, w8 -; CHECK-SD-NEXT: fmov s18, w11 -; CHECK-SD-NEXT: csel w11, w18, w8, lo -; CHECK-SD-NEXT: cmp w12, w8 -; CHECK-SD-NEXT: fcvtzu w17, d2 -; CHECK-SD-NEXT: csel w12, w12, w8, lo ; CHECK-SD-NEXT: cmp w16, w8 -; CHECK-SD-NEXT: fcvtzu w18, d6 -; CHECK-SD-NEXT: mov v18.s[1], w9 +; CHECK-SD-NEXT: fcvtzu w14, d2 ; CHECK-SD-NEXT: csel w9, w16, w8, lo -; CHECK-SD-NEXT: cmp w14, w8 -; CHECK-SD-NEXT: fmov s17, w10 -; CHECK-SD-NEXT: csel w10, w14, w8, lo -; CHECK-SD-NEXT: fcvtzu w16, d5 -; CHECK-SD-NEXT: fmov s23, w10 ; CHECK-SD-NEXT: cmp w17, w8 -; CHECK-SD-NEXT: fcvtzu w14, d3 -; CHECK-SD-NEXT: csel w10, w17, w8, lo -; CHECK-SD-NEXT: cmp w15, w8 -; CHECK-SD-NEXT: fcvtzu w17, d4 -; CHECK-SD-NEXT: mov v17.s[1], w13 -; CHECK-SD-NEXT: mov v23.s[1], w9 -; CHECK-SD-NEXT: csel w9, w15, w8, lo +; CHECK-SD-NEXT: mov d2, v6.d[1] +; CHECK-SD-NEXT: csel w16, w17, w8, lo ; CHECK-SD-NEXT: cmp w18, w8 -; CHECK-SD-NEXT: fmov s22, w9 -; CHECK-SD-NEXT: csel w9, w18, w8, lo -; CHECK-SD-NEXT: cmp w16, w8 -; CHECK-SD-NEXT: fmov s16, w12 -; CHECK-SD-NEXT: mov v22.s[1], w10 -; CHECK-SD-NEXT: csel w10, w16, w8, lo +; CHECK-SD-NEXT: csel w17, w18, w8, lo +; CHECK-SD-NEXT: fcvtzu w18, d5 +; CHECK-SD-NEXT: mov v0.h[2], w11 +; CHECK-SD-NEXT: fmov s1, w17 +; CHECK-SD-NEXT: fcvtzu w11, d6 +; CHECK-SD-NEXT: mov v1.h[1], w16 +; CHECK-SD-NEXT: cmp w18, w8 +; CHECK-SD-NEXT: mov v0.h[3], w12 +; CHECK-SD-NEXT: csel w15, w18, w8, lo ; CHECK-SD-NEXT: cmp w14, w8 -; CHECK-SD-NEXT: fmov s21, w10 -; CHECK-SD-NEXT: csel w10, w14, w8, lo -; CHECK-SD-NEXT: cmp w17, w8 -; CHECK-SD-NEXT: csel w8, w17, w8, lo -; CHECK-SD-NEXT: mov v16.s[1], w11 -; CHECK-SD-NEXT: mov v21.s[1], w9 -; CHECK-SD-NEXT: fmov s20, w8 -; CHECK-SD-NEXT: adrp x8, .LCPI85_0 -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] -; CHECK-SD-NEXT: mov v20.s[1], w10 -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: fcvtzu w12, d2 +; CHECK-SD-NEXT: csel w14, w14, w8, lo +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: mov d2, v7.d[1] +; CHECK-SD-NEXT: csel w11, w11, w8, lo +; CHECK-SD-NEXT: mov v1.h[2], w15 +; CHECK-SD-NEXT: mov v0.h[4], w13 +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: csel w12, w12, w8, lo +; CHECK-SD-NEXT: mov v1.h[3], w14 +; CHECK-SD-NEXT: mov v0.h[5], w0 +; CHECK-SD-NEXT: mov v1.h[4], w11 +; CHECK-SD-NEXT: fcvtzu w11, d7 +; CHECK-SD-NEXT: mov v0.h[6], w10 +; CHECK-SD-NEXT: mov v1.h[5], w12 +; CHECK-SD-NEXT: cmp w11, w8 +; CHECK-SD-NEXT: fcvtzu w12, d2 +; CHECK-SD-NEXT: csel w11, w11, w8, lo +; CHECK-SD-NEXT: mov v0.h[7], w9 +; CHECK-SD-NEXT: mov v1.h[6], w11 +; CHECK-SD-NEXT: cmp w12, w8 +; CHECK-SD-NEXT: csel w8, w12, w8, lo +; CHECK-SD-NEXT: mov v1.h[7], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16: diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index b4c38e9f2df3..1f84c944d7c1 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -304,25 +304,15 @@ entry: } define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) { -; CHECK-SD-LABEL: fptrunc_v3f64_v3f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvt h1, d1 -; CHECK-SD-NEXT: fcvt h0, d0 -; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: fcvt h1, d2 -; CHECK-SD-NEXT: mov v0.h[2], v1.h[0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v3f64_v3f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fcvt h0, d0 -; CHECK-GI-NEXT: fcvt h1, d1 -; CHECK-GI-NEXT: fcvt h2, d2 -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v3f64_v3f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: fcvt h1, d1 +; CHECK-NEXT: fcvt h2, d2 +; CHECK-NEXT: mov v0.h[1], v1.h[0] +; CHECK-NEXT: mov v0.h[2], v2.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %c = fptrunc <3 x double> %a to <3 x half> ret <3 x half> %c diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 71b53c662bb2..9d4d654259a3 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -4143,11 +4143,11 @@ entry: define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) { ; CHECK-SD-LABEL: stofp_v3i128_v3f32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: sub sp, sp, #80 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 @@ -4155,31 +4155,31 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x21, x1 ; CHECK-SD-NEXT: mov x22, x0 -; CHECK-SD-NEXT: mov x0, x2 -; CHECK-SD-NEXT: mov x1, x3 -; CHECK-SD-NEXT: mov x19, x5 -; CHECK-SD-NEXT: mov x20, x4 +; CHECK-SD-NEXT: mov x0, x4 +; CHECK-SD-NEXT: mov x1, x5 +; CHECK-SD-NEXT: mov x19, x3 +; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: bl __floattisf ; CHECK-SD-NEXT: mov x0, x22 ; CHECK-SD-NEXT: mov x1, x21 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __floattisf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 -; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __floattisf ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v1.s[1], v0.s[0] +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: add sp, sp, #80 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: stofp_v3i128_v3f32: @@ -4227,11 +4227,11 @@ entry: define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) { ; CHECK-SD-LABEL: utofp_v3i128_v3f32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: sub sp, sp, #80 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 @@ -4239,31 +4239,31 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x21, x1 ; CHECK-SD-NEXT: mov x22, x0 -; CHECK-SD-NEXT: mov x0, x2 -; CHECK-SD-NEXT: mov x1, x3 -; CHECK-SD-NEXT: mov x19, x5 -; CHECK-SD-NEXT: mov x20, x4 +; CHECK-SD-NEXT: mov x0, x4 +; CHECK-SD-NEXT: mov x1, x5 +; CHECK-SD-NEXT: mov x19, x3 +; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: bl __floatuntisf ; CHECK-SD-NEXT: mov x0, x22 ; CHECK-SD-NEXT: mov x1, x21 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __floatuntisf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 -; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-NEXT: bl __floatuntisf ; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v1.s[1], v0.s[0] +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: add sp, sp, #80 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: utofp_v3i128_v3f32: @@ -6035,11 +6035,11 @@ entry: define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-NOFP16-LABEL: stofp_v3i128_v3f16: ; CHECK-SD-NOFP16: // %bb.0: // %entry -; CHECK-SD-NOFP16-NEXT: sub sp, sp, #64 -; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NOFP16-NEXT: sub sp, sp, #80 +; CHECK-SD-NOFP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w21, -24 @@ -6047,40 +6047,41 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NOFP16-NEXT: mov x21, x1 ; CHECK-SD-NOFP16-NEXT: mov x22, x0 -; CHECK-SD-NOFP16-NEXT: mov x0, x2 -; CHECK-SD-NOFP16-NEXT: mov x1, x3 -; CHECK-SD-NOFP16-NEXT: mov x19, x5 -; CHECK-SD-NOFP16-NEXT: mov x20, x4 +; CHECK-SD-NOFP16-NEXT: mov x0, x4 +; CHECK-SD-NOFP16-NEXT: mov x1, x5 +; CHECK-SD-NOFP16-NEXT: mov x19, x3 +; CHECK-SD-NOFP16-NEXT: mov x20, x2 ; CHECK-SD-NOFP16-NEXT: bl __floattisf ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 ; CHECK-SD-NOFP16-NEXT: mov x0, x22 ; CHECK-SD-NOFP16-NEXT: mov x1, x21 +; CHECK-SD-NOFP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: bl __floattisf +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov x0, x20 +; CHECK-SD-NOFP16-NEXT: mov x1, x19 ; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-NOFP16-NEXT: bl __floattisf ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 ; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: mov x0, x20 -; CHECK-SD-NOFP16-NEXT: mov x1, x19 -; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: bl __floattisf -; CHECK-SD-NOFP16-NEXT: fcvt h1, s0 -; CHECK-SD-NOFP16-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0] +; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v0.h[0] +; CHECK-SD-NOFP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NOFP16-NEXT: add sp, sp, #64 +; CHECK-SD-NOFP16-NEXT: add sp, sp, #80 ; CHECK-SD-NOFP16-NEXT: ret ; ; CHECK-SD-FP16-LABEL: stofp_v3i128_v3f16: ; CHECK-SD-FP16: // %bb.0: // %entry -; CHECK-SD-FP16-NEXT: sub sp, sp, #64 -; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-FP16-NEXT: sub sp, sp, #80 +; CHECK-SD-FP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-FP16-NEXT: .cfi_offset w19, -8 ; CHECK-SD-FP16-NEXT: .cfi_offset w20, -16 ; CHECK-SD-FP16-NEXT: .cfi_offset w21, -24 @@ -6088,31 +6089,32 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-FP16-NEXT: .cfi_offset w30, -48 ; CHECK-SD-FP16-NEXT: mov x21, x1 ; CHECK-SD-FP16-NEXT: mov x22, x0 -; CHECK-SD-FP16-NEXT: mov x0, x2 -; CHECK-SD-FP16-NEXT: mov x1, x3 -; CHECK-SD-FP16-NEXT: mov x19, x5 -; CHECK-SD-FP16-NEXT: mov x20, x4 +; CHECK-SD-FP16-NEXT: mov x0, x4 +; CHECK-SD-FP16-NEXT: mov x1, x5 +; CHECK-SD-FP16-NEXT: mov x19, x3 +; CHECK-SD-FP16-NEXT: mov x20, x2 ; CHECK-SD-FP16-NEXT: bl __floattihf ; CHECK-SD-FP16-NEXT: mov x0, x22 ; CHECK-SD-FP16-NEXT: mov x1, x21 ; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-FP16-NEXT: bl __floattihf -; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-SD-FP16-NEXT: mov x0, x20 ; CHECK-SD-FP16-NEXT: mov x1, x19 -; CHECK-SD-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-FP16-NEXT: bl __floattihf ; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-FP16-NEXT: mov v1.h[1], v0.h[0] +; CHECK-SD-FP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-FP16-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-FP16-NEXT: fmov d0, d1 -; CHECK-SD-FP16-NEXT: add sp, sp, #64 +; CHECK-SD-FP16-NEXT: mov v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-FP16-NEXT: add sp, sp, #80 ; CHECK-SD-FP16-NEXT: ret ; ; CHECK-GI-NOFP16-LABEL: stofp_v3i128_v3f16: @@ -6200,11 +6202,11 @@ entry: define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-NOFP16-LABEL: utofp_v3i128_v3f16: ; CHECK-SD-NOFP16: // %bb.0: // %entry -; CHECK-SD-NOFP16-NEXT: sub sp, sp, #64 -; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NOFP16-NEXT: sub sp, sp, #80 +; CHECK-SD-NOFP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w21, -24 @@ -6212,40 +6214,41 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NOFP16-NEXT: mov x21, x1 ; CHECK-SD-NOFP16-NEXT: mov x22, x0 -; CHECK-SD-NOFP16-NEXT: mov x0, x2 -; CHECK-SD-NOFP16-NEXT: mov x1, x3 -; CHECK-SD-NOFP16-NEXT: mov x19, x5 -; CHECK-SD-NOFP16-NEXT: mov x20, x4 +; CHECK-SD-NOFP16-NEXT: mov x0, x4 +; CHECK-SD-NOFP16-NEXT: mov x1, x5 +; CHECK-SD-NOFP16-NEXT: mov x19, x3 +; CHECK-SD-NOFP16-NEXT: mov x20, x2 ; CHECK-SD-NOFP16-NEXT: bl __floatuntisf ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 ; CHECK-SD-NOFP16-NEXT: mov x0, x22 ; CHECK-SD-NOFP16-NEXT: mov x1, x21 +; CHECK-SD-NOFP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: bl __floatuntisf +; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 +; CHECK-SD-NOFP16-NEXT: mov x0, x20 +; CHECK-SD-NOFP16-NEXT: mov x1, x19 ; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-NOFP16-NEXT: bl __floatuntisf ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 ; CHECK-SD-NOFP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: mov x0, x20 -; CHECK-SD-NOFP16-NEXT: mov x1, x19 -; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NOFP16-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: bl __floatuntisf -; CHECK-SD-NOFP16-NEXT: fcvt h1, s0 -; CHECK-SD-NOFP16-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v1.h[0] +; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: mov v1.h[1], v0.h[0] +; CHECK-SD-NOFP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: mov v1.h[2], v0.h[0] +; CHECK-SD-NOFP16-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NOFP16-NEXT: add sp, sp, #64 +; CHECK-SD-NOFP16-NEXT: add sp, sp, #80 ; CHECK-SD-NOFP16-NEXT: ret ; ; CHECK-SD-FP16-LABEL: utofp_v3i128_v3f16: ; CHECK-SD-FP16: // %bb.0: // %entry -; CHECK-SD-FP16-NEXT: sub sp, sp, #64 -; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-FP16-NEXT: sub sp, sp, #80 +; CHECK-SD-FP16-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-FP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-FP16-NEXT: .cfi_offset w19, -8 ; CHECK-SD-FP16-NEXT: .cfi_offset w20, -16 ; CHECK-SD-FP16-NEXT: .cfi_offset w21, -24 @@ -6253,31 +6256,32 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) { ; CHECK-SD-FP16-NEXT: .cfi_offset w30, -48 ; CHECK-SD-FP16-NEXT: mov x21, x1 ; CHECK-SD-FP16-NEXT: mov x22, x0 -; CHECK-SD-FP16-NEXT: mov x0, x2 -; CHECK-SD-FP16-NEXT: mov x1, x3 -; CHECK-SD-FP16-NEXT: mov x19, x5 -; CHECK-SD-FP16-NEXT: mov x20, x4 +; CHECK-SD-FP16-NEXT: mov x0, x4 +; CHECK-SD-FP16-NEXT: mov x1, x5 +; CHECK-SD-FP16-NEXT: mov x19, x3 +; CHECK-SD-FP16-NEXT: mov x20, x2 ; CHECK-SD-FP16-NEXT: bl __floatuntihf ; CHECK-SD-FP16-NEXT: mov x0, x22 ; CHECK-SD-FP16-NEXT: mov x1, x21 ; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-FP16-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-FP16-NEXT: bl __floatuntihf -; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-SD-FP16-NEXT: mov x0, x20 ; CHECK-SD-FP16-NEXT: mov x1, x19 -; CHECK-SD-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-SD-FP16-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-SD-FP16-NEXT: bl __floatuntihf ; CHECK-SD-FP16-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-SD-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-FP16-NEXT: mov v1.h[1], v0.h[0] +; CHECK-SD-FP16-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-FP16-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-FP16-NEXT: fmov d0, d1 -; CHECK-SD-FP16-NEXT: add sp, sp, #64 +; CHECK-SD-FP16-NEXT: mov v0.16b, v1.16b +; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-FP16-NEXT: add sp, sp, #80 ; CHECK-SD-FP16-NEXT: ret ; ; CHECK-GI-NOFP16-LABEL: utofp_v3i128_v3f16: diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll index ce5b80f3e2be..2f193cefd7c0 100644 --- a/llvm/test/CodeGen/AArch64/sext.ll +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -1245,12 +1245,10 @@ entry: define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) { ; CHECK-SD-LABEL: sext_v2i64_v2i128: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov x8, v0.d[1] -; CHECK-SD-NEXT: dup v1.2d, v0.d[1] +; CHECK-SD-NEXT: mov x2, v0.d[1] ; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: fmov x2, d1 ; CHECK-SD-NEXT: asr x1, x0, #63 -; CHECK-SD-NEXT: asr x3, x8, #63 +; CHECK-SD-NEXT: asr x3, x2, #63 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sext_v2i64_v2i128: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll index ffef6f74f2d3..e4eda2e3a1e3 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll @@ -100,22 +100,17 @@ define void @bitcast_v2i16(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: bitcast_v2i16: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #32 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 -; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2] -; NONEON-NOSVE-NEXT: str w8, [sp, #4] +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 ; NONEON-NOSVE-NEXT: ldrh w8, [x0] -; NONEON-NOSVE-NEXT: str w8, [sp] +; NONEON-NOSVE-NEXT: ldrh w9, [x0, #2] +; NONEON-NOSVE-NEXT: strh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp] ; NONEON-NOSVE-NEXT: ldr d0, [sp] ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] -; NONEON-NOSVE-NEXT: strh w9, [sp, #18] -; NONEON-NOSVE-NEXT: strh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] -; NONEON-NOSVE-NEXT: str d0, [sp, #24] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] ; NONEON-NOSVE-NEXT: str w8, [x1] -; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: add sp, sp, #16 ; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i16>, ptr %a %cast = bitcast <2 x i16> %load to <2 x half> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll index 7d6336a43a4f..4d524bc848de 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -148,75 +148,38 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) { ; ; NONEON-NOSVE-LABEL: load_sext_v16i8i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] -; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] -; NONEON-NOSVE-NEXT: strh w8, [sp, #60] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] -; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] -; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #52] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] -; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] -; NONEON-NOSVE-NEXT: strh w8, [sp, #44] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] -; NONEON-NOSVE-NEXT: strh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] -; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] -; NONEON-NOSVE-NEXT: strh w8, [sp, #36] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] -; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] -; NONEON-NOSVE-NEXT: strh w8, [sp, #32] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i8>, ptr %ap %val = sext <16 x i8> %a to <16 x i32> @@ -291,18 +254,12 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) { ; NONEON-NOSVE-LABEL: load_sext_v4i32i256: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #24] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #16] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #32] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] -; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #64] -; NONEON-NOSVE-NEXT: ldp x11, x9, [sp, #64] -; NONEON-NOSVE-NEXT: ldp x12, x13, [sp, #80] +; NONEON-NOSVE-NEXT: ldpsw x11, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldpsw x12, x13, [sp, #24] ; NONEON-NOSVE-NEXT: asr x10, x9, #63 ; NONEON-NOSVE-NEXT: asr x14, x11, #63 ; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #112] @@ -315,7 +272,7 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) { ; NONEON-NOSVE-NEXT: stp x13, x9, [x8, #32] ; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #16] ; NONEON-NOSVE-NEXT: stp x12, x10, [x8] -; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: add sp, sp, #32 ; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = sext <4 x i32> %a to <4 x i256> @@ -327,52 +284,28 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.d, z0.d[1] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: asr x8, x8, #63 -; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: zip1 z0.d, z0.d, z3.d -; CHECK-NEXT: mov z3.d, x9 -; CHECK-NEXT: fmov x2, d2 -; CHECK-NEXT: zip1 z1.d, z1.d, z4.d -; CHECK-NEXT: mov z4.d, z2.d[1] -; CHECK-NEXT: mov z5.d, z0.d[1] -; CHECK-NEXT: mov z6.d, z3.d[1] ; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: fmov x6, d3 -; CHECK-NEXT: mov z2.d, z1.d[1] -; CHECK-NEXT: fmov x3, d4 -; CHECK-NEXT: fmov x1, d5 ; CHECK-NEXT: fmov x4, d1 -; CHECK-NEXT: fmov x7, d6 -; CHECK-NEXT: fmov x5, d2 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: mov x2, x1 +; CHECK-NEXT: mov x3, x1 +; CHECK-NEXT: asr x5, x4, #63 +; CHECK-NEXT: mov x6, x5 +; CHECK-NEXT: mov x7, x5 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: load_sext_v2i64i256: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #144 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp] -; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] -; NONEON-NOSVE-NEXT: asr x9, x8, #63 -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] -; NONEON-NOSVE-NEXT: asr x8, x10, #63 -; NONEON-NOSVE-NEXT: stp x9, x9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] -; NONEON-NOSVE-NEXT: stp x10, x8, [sp, #64] -; NONEON-NOSVE-NEXT: stp x8, x8, [sp, #48] -; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #80] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] -; NONEON-NOSVE-NEXT: ldp x0, x1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp x2, x3, [sp, #80] -; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #112] -; NONEON-NOSVE-NEXT: ldp x4, x5, [sp, #128] -; NONEON-NOSVE-NEXT: ldp x6, x7, [sp, #112] -; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x0, x4, [sp], #16 +; NONEON-NOSVE-NEXT: asr x1, x0, #63 +; NONEON-NOSVE-NEXT: asr x5, x4, #63 +; NONEON-NOSVE-NEXT: mov x2, x1 +; NONEON-NOSVE-NEXT: mov x3, x1 +; NONEON-NOSVE-NEXT: mov x6, x5 +; NONEON-NOSVE-NEXT: mov x7, x5 ; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = sext <2 x i64> %a to <2 x i256> @@ -410,88 +343,51 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) { ; ; NONEON-NOSVE-LABEL: load_zext_v16i16i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #336 -; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: str wzr, [sp, #316] -; NONEON-NOSVE-NEXT: str wzr, [sp, #308] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload -; NONEON-NOSVE-NEXT: str wzr, [sp, #300] ; NONEON-NOSVE-NEXT: stp q0, q1, [sp] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: str wzr, [sp, #292] -; NONEON-NOSVE-NEXT: str wzr, [sp, #284] -; NONEON-NOSVE-NEXT: str wzr, [sp, #276] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] -; NONEON-NOSVE-NEXT: str wzr, [sp, #268] -; NONEON-NOSVE-NEXT: str wzr, [sp, #260] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #120] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #104] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #184] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] -; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #176] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #168] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: str d1, [sp, #328] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #248] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #240] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] -; NONEON-NOSVE-NEXT: str d0, [sp, #168] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #216] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #208] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #192] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #200] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #192] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] -; NONEON-NOSVE-NEXT: str w9, [sp, #296] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] -; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #288] -; NONEON-NOSVE-NEXT: str w9, [sp, #280] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] -; NONEON-NOSVE-NEXT: str w9, [sp, #264] -; NONEON-NOSVE-NEXT: str w8, [sp, #256] -; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #256] -; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #160] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #160] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #152] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #136] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128] +; NONEON-NOSVE-NEXT: add sp, sp, #192 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %ap %val = zext <16 x i16> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll index c96189b96026..56149e99b15f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll @@ -440,25 +440,20 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) { ; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldr w8, [x0] -; NONEON-NOSVE-NEXT: str w8, [sp, #-48]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: str w8, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 ; NONEON-NOSVE-NEXT: ldr d0, [sp] ; NONEON-NOSVE-NEXT: str d0, [sp, #8] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] -; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] -; NONEON-NOSVE-NEXT: str d0, [sp, #24] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] ; NONEON-NOSVE-NEXT: str q0, [x1] -; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: add sp, sp, #32 ; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %res = fpext <2 x half> %op1 to <2 x double> @@ -480,35 +475,27 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: ldr d0, [x0] ; NONEON-NOSVE-NEXT: str d0, [sp, #8] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fpext <4 x half> %op1 to <4 x double> @@ -537,61 +524,44 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #72] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #68] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #64] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x double> @@ -637,115 +607,79 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #336 -; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q0, q1, [sp] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] -; NONEON-NOSVE-NEXT: fcvt s1, h0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] -; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] -; NONEON-NOSVE-NEXT: fcvt s1, h0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] -; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #66] -; NONEON-NOSVE-NEXT: fcvt s1, h0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #128] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #70] -; NONEON-NOSVE-NEXT: fcvt s1, h0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #68] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] -; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt d1, s0 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] ; NONEON-NOSVE-NEXT: fcvt s0, h0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] -; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] -; NONEON-NOSVE-NEXT: str d1, [sp, #328] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] -; NONEON-NOSVE-NEXT: str d0, [sp, #168] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #164] -; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #160] ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #156] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160] +; NONEON-NOSVE-NEXT: fcvt s0, h0 ; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #152] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #148] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 ; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #144] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 ; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #140] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #136] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #332] -; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #328] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #188] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #184] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #180] -; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #176] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #172] -; NONEON-NOSVE-NEXT: fcvt d1, s0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #168] -; NONEON-NOSVE-NEXT: fcvt d0, s0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] -; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128] ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: add sp, sp, #192 ; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x double> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll index 40c8ab27c0b0..75911e5ff156 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -30,50 +30,32 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v8i1_v8i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] -; NONEON-NOSVE-NEXT: strh w8, [sp, #22] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] -; NONEON-NOSVE-NEXT: strh w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] -; NONEON-NOSVE-NEXT: strh w8, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] -; NONEON-NOSVE-NEXT: strh w8, [sp, #16] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] -; NONEON-NOSVE-NEXT: strh w8, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] -; NONEON-NOSVE-NEXT: strh w8, [sp, #28] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] -; NONEON-NOSVE-NEXT: strh w8, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] -; NONEON-NOSVE-NEXT: strh w8, [sp, #24] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] -; NONEON-NOSVE-NEXT: ldrh w10, [sp, #44] -; NONEON-NOSVE-NEXT: ldrh w12, [sp, #42] -; NONEON-NOSVE-NEXT: ldrh w14, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] -; NONEON-NOSVE-NEXT: ldrh w11, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] ; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 ; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 -; NONEON-NOSVE-NEXT: ldrh w13, [sp, #36] -; NONEON-NOSVE-NEXT: ldrh w15, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #15] ; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 ; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 -; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #40] ; NONEON-NOSVE-NEXT: sbfx w8, w14, #0, #1 ; NONEON-NOSVE-NEXT: sbfx w10, w15, #0, #1 -; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #32] ; NONEON-NOSVE-NEXT: sbfx w12, w13, #0, #1 ; NONEON-NOSVE-NEXT: sbfx w8, w11, #0, #1 -; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #56] -; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #24] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> store <8 x i32> %b, ptr %out @@ -206,14 +188,14 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v32i8_v32i16: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #272 -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: sub sp, sp, #208 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -230,182 +212,146 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] -; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] -; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] -; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] -; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w5, w14, w14 +; NONEON-NOSVE-NEXT: sxtb w6, w17 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] +; NONEON-NOSVE-NEXT: sxtb w5, w5 ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] -; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #30] +; NONEON-NOSVE-NEXT: strh w6, [sp, #78] +; NONEON-NOSVE-NEXT: add w6, w30, w30 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #31] +; NONEON-NOSVE-NEXT: sxtb w6, w6 ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144] +; NONEON-NOSVE-NEXT: strh w5, [sp, #72] +; NONEON-NOSVE-NEXT: add w5, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #29] +; NONEON-NOSVE-NEXT: sxtb w9, w9 +; NONEON-NOSVE-NEXT: sxtb w5, w5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #26] +; NONEON-NOSVE-NEXT: strh w6, [sp, #70] +; NONEON-NOSVE-NEXT: add w6, w28, w28 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #27] +; NONEON-NOSVE-NEXT: sxtb w6, w6 +; NONEON-NOSVE-NEXT: strh w9, [sp, #66] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strh w5, [sp, #64] +; NONEON-NOSVE-NEXT: add w5, w25, w25 +; NONEON-NOSVE-NEXT: sxtb w9, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #25] +; NONEON-NOSVE-NEXT: strh w6, [sp, #62] +; NONEON-NOSVE-NEXT: add w6, w24, w24 +; NONEON-NOSVE-NEXT: add w5, w23, w23 +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #39] +; NONEON-NOSVE-NEXT: add w6, w22, w22 +; NONEON-NOSVE-NEXT: add w5, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #21] +; NONEON-NOSVE-NEXT: strh w9, [sp, #54] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37] +; NONEON-NOSVE-NEXT: add w6, w20, w20 +; NONEON-NOSVE-NEXT: add w4, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: add w18, w15, w15 +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: sxtb w19, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: sxtb w8, w4 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35] +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: add w2, w2, w2 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: strh w9, [sp, #110] +; NONEON-NOSVE-NEXT: sxtb w9, w3 +; NONEON-NOSVE-NEXT: strh w8, [sp, #108] +; NONEON-NOSVE-NEXT: sxtb w8, w2 +; NONEON-NOSVE-NEXT: sxtb w7, w16 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33] +; NONEON-NOSVE-NEXT: add w0, w0, w0 +; NONEON-NOSVE-NEXT: add w18, w18, w18 +; NONEON-NOSVE-NEXT: strh w9, [sp, #106] +; NONEON-NOSVE-NEXT: sxtb w9, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #104] +; NONEON-NOSVE-NEXT: sxtb w8, w18 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: strh w9, [sp, #102] +; NONEON-NOSVE-NEXT: sxtb w9, w17 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #100] +; NONEON-NOSVE-NEXT: sxtb w8, w16 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: strh w9, [sp, #98] +; NONEON-NOSVE-NEXT: sxtb w9, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #96] +; NONEON-NOSVE-NEXT: sxtb w8, w14 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w9, [sp, #94] +; NONEON-NOSVE-NEXT: sxtb w9, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: sxtb w8, w12 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: strh w9, [sp, #90] +; NONEON-NOSVE-NEXT: sxtb w9, w11 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: sxtb w8, w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w9, [sp, #86] +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: sxtb w9, w11 +; NONEON-NOSVE-NEXT: strh w7, [sp, #76] +; NONEON-NOSVE-NEXT: sxtb w8, w10 +; NONEON-NOSVE-NEXT: strh w19, [sp, #74] +; NONEON-NOSVE-NEXT: strh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #208 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -431,42 +377,24 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v8i8_v8i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] -; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] -; NONEON-NOSVE-NEXT: strh w8, [sp, #28] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] -; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] -; NONEON-NOSVE-NEXT: strh w8, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] -; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] -; NONEON-NOSVE-NEXT: strh w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] -; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] -; NONEON-NOSVE-NEXT: strh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out @@ -492,75 +420,39 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v16i8_v16i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] -; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] -; NONEON-NOSVE-NEXT: strh w8, [sp, #60] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] -; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] -; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #52] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] -; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] -; NONEON-NOSVE-NEXT: strh w8, [sp, #44] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] -; NONEON-NOSVE-NEXT: strh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] -; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] -; NONEON-NOSVE-NEXT: strh w8, [sp, #36] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] -; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21] ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] -; NONEON-NOSVE-NEXT: strh w8, [sp, #32] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out @@ -599,14 +491,14 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v32i8_v32i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #464 -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464 +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -621,258 +513,136 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 -; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] -; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] -; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #17] +; NONEON-NOSVE-NEXT: add w16, w16, w16 ; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] -; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] -; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #23] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sxtb w6, w17 +; NONEON-NOSVE-NEXT: sxtb w7, w16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w5, w14, w14 +; NONEON-NOSVE-NEXT: add w18, w15, w15 +; NONEON-NOSVE-NEXT: sxtb w19, w18 +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21] +; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #104] +; NONEON-NOSVE-NEXT: add w6, w30, w30 +; NONEON-NOSVE-NEXT: sxtb w5, w5 ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] -; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #27] +; NONEON-NOSVE-NEXT: sxtb w6, w6 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: stp w5, w19, [sp, #96] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] -; NONEON-NOSVE-NEXT: str w8, [sp, #284] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196] -; NONEON-NOSVE-NEXT: str w8, [sp, #280] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #276] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206] -; NONEON-NOSVE-NEXT: str w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204] -; NONEON-NOSVE-NEXT: str w8, [sp, #296] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202] -; NONEON-NOSVE-NEXT: str w8, [sp, #292] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190] -; NONEON-NOSVE-NEXT: str w8, [sp, #268] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188] -; NONEON-NOSVE-NEXT: str w8, [sp, #264] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186] -; NONEON-NOSVE-NEXT: str w8, [sp, #260] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184] -; NONEON-NOSVE-NEXT: str w8, [sp, #256] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240] -; NONEON-NOSVE-NEXT: str w8, [sp, #348] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226] -; NONEON-NOSVE-NEXT: str w8, [sp, #340] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238] -; NONEON-NOSVE-NEXT: str w8, [sp, #364] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236] -; NONEON-NOSVE-NEXT: str w8, [sp, #360] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234] -; NONEON-NOSVE-NEXT: str w8, [sp, #356] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232] -; NONEON-NOSVE-NEXT: str w8, [sp, #352] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336] -; NONEON-NOSVE-NEXT: str w8, [sp, #316] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210] -; NONEON-NOSVE-NEXT: str w8, [sp, #308] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222] -; NONEON-NOSVE-NEXT: str w8, [sp, #332] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220] -; NONEON-NOSVE-NEXT: str w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218] -; NONEON-NOSVE-NEXT: str w8, [sp, #324] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304] +; NONEON-NOSVE-NEXT: add w5, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25] +; NONEON-NOSVE-NEXT: sxtb w9, w9 +; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #88] +; NONEON-NOSVE-NEXT: add w6, w28, w28 +; NONEON-NOSVE-NEXT: sxtb w5, w5 +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: sxtb w6, w6 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #31] +; NONEON-NOSVE-NEXT: stp w5, w9, [sp, #80] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: add w5, w25, w25 +; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #72] +; NONEON-NOSVE-NEXT: sxtb w9, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #29] +; NONEON-NOSVE-NEXT: add w6, w24, w24 +; NONEON-NOSVE-NEXT: add w5, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #35] +; NONEON-NOSVE-NEXT: add w6, w22, w22 +; NONEON-NOSVE-NEXT: add w5, w21, w21 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: sxtb w8, w5 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33] +; NONEON-NOSVE-NEXT: add w6, w20, w20 +; NONEON-NOSVE-NEXT: add w4, w4, w4 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: sxtb w9, w6 +; NONEON-NOSVE-NEXT: sxtb w8, w4 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39] +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: add w2, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: sxtb w9, w3 +; NONEON-NOSVE-NEXT: sxtb w8, w2 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37] +; NONEON-NOSVE-NEXT: add w0, w0, w0 +; NONEON-NOSVE-NEXT: add w18, w18, w18 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: sxtb w9, w0 +; NONEON-NOSVE-NEXT: sxtb w8, w18 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: sxtb w9, w17 +; NONEON-NOSVE-NEXT: sxtb w8, w16 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41] +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: sxtb w9, w15 +; NONEON-NOSVE-NEXT: sxtb w8, w14 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: sxtb w9, w13 +; NONEON-NOSVE-NEXT: sxtb w8, w12 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: sxtb w9, w11 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sxtb w8, w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: sxtb w11, w11 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: sxtb w8, w10 +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #112] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #464 +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #272 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -944,57 +714,26 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v8i8_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #176 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176 +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: add x8, sp, #144 -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] -; NONEON-NOSVE-NEXT: strh w9, [sp, #30] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] -; NONEON-NOSVE-NEXT: strh w9, [sp, #28] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] -; NONEON-NOSVE-NEXT: strh w9, [sp, #26] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] -; NONEON-NOSVE-NEXT: strh w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] -; NONEON-NOSVE-NEXT: strh w9, [sp, #22] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] -; NONEON-NOSVE-NEXT: strh w9, [sp, #20] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] -; NONEON-NOSVE-NEXT: strh w9, [sp, #18] -; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] -; NONEON-NOSVE-NEXT: strh w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #42] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #46] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72] -; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #34] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #38] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #96] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #144] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #104] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #160] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #80] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #112] -; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #88] -; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112] -; NONEON-NOSVE-NEXT: ldp q2, q3, [x8] -; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #8] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #11] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #10] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #13] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #12] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #15] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #14] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #176 +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #80 ; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -1034,109 +773,43 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v16i8_v16i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #368 -; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 -; NONEON-NOSVE-NEXT: str q0, [sp] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #35] -; NONEON-NOSVE-NEXT: strh w8, [sp, #70] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #34] -; NONEON-NOSVE-NEXT: strh w8, [sp, #68] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #33] -; NONEON-NOSVE-NEXT: strh w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #32] -; NONEON-NOSVE-NEXT: strh w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #39] -; NONEON-NOSVE-NEXT: strh w8, [sp, #62] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #38] -; NONEON-NOSVE-NEXT: strh w8, [sp, #60] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #37] -; NONEON-NOSVE-NEXT: strh w8, [sp, #58] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #36] -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: strh w8, [sp, #54] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] -; NONEON-NOSVE-NEXT: strh w8, [sp, #52] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: strh w8, [sp, #50] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] -; NONEON-NOSVE-NEXT: strh w8, [sp, #46] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #44] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] -; NONEON-NOSVE-NEXT: strh w8, [sp, #42] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] -; NONEON-NOSVE-NEXT: str d0, [sp, #360] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104] -; NONEON-NOSVE-NEXT: str d2, [sp, #168] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320] -; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #24] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #26] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #28] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #30] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #17] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #18] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #22] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32] ; NONEON-NOSVE-NEXT: stp q0, q1, [x0] ; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64] ; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: add sp, sp, #160 ; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out @@ -1208,14 +881,14 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v32i8_v32i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: sub sp, sp, #752 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848 +; NONEON-NOSVE-NEXT: sub sp, sp, #400 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -1230,345 +903,144 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 -; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] -; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] -; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #19] +; NONEON-NOSVE-NEXT: add w16, w16, w16 ; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] -; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] -; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sxtb x19, w17 +; NONEON-NOSVE-NEXT: sxtb x20, w16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w7, w14, w14 +; NONEON-NOSVE-NEXT: add w18, w15, w15 +; NONEON-NOSVE-NEXT: sxtb x21, w18 +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23] +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #160] +; NONEON-NOSVE-NEXT: add w19, w30, w30 +; NONEON-NOSVE-NEXT: sxtb x7, w7 ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] -; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #25] +; NONEON-NOSVE-NEXT: sxtb x19, w19 +; NONEON-NOSVE-NEXT: sxtb x8, w8 +; NONEON-NOSVE-NEXT: stp x7, x21, [sp, #144] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] -; NONEON-NOSVE-NEXT: str w8, [sp, #276] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #284] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196] -; NONEON-NOSVE-NEXT: str w8, [sp, #280] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272] -; NONEON-NOSVE-NEXT: str w8, [sp, #292] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400] -; NONEON-NOSVE-NEXT: str w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204] -; NONEON-NOSVE-NEXT: str w8, [sp, #296] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240] -; NONEON-NOSVE-NEXT: str w8, [sp, #260] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184] -; NONEON-NOSVE-NEXT: str w8, [sp, #256] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368] -; NONEON-NOSVE-NEXT: str w8, [sp, #268] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188] -; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #372] -; NONEON-NOSVE-NEXT: str w8, [sp, #264] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256] -; NONEON-NOSVE-NEXT: str w8, [sp, #340] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384] -; NONEON-NOSVE-NEXT: str w8, [sp, #348] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336] -; NONEON-NOSVE-NEXT: str w8, [sp, #356] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232] -; NONEON-NOSVE-NEXT: str w8, [sp, #352] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464] -; NONEON-NOSVE-NEXT: str w8, [sp, #364] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236] -; NONEON-NOSVE-NEXT: str w8, [sp, #360] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352] -; NONEON-NOSVE-NEXT: str w8, [sp, #308] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480] -; NONEON-NOSVE-NEXT: str w8, [sp, #316] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304] -; NONEON-NOSVE-NEXT: str w8, [sp, #324] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432] -; NONEON-NOSVE-NEXT: str w8, [sp, #332] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220] -; NONEON-NOSVE-NEXT: str w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #404] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320] -; NONEON-NOSVE-NEXT: str x8, [sp, #568] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #400] -; NONEON-NOSVE-NEXT: str x8, [sp, #560] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #412] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448] -; NONEON-NOSVE-NEXT: str x8, [sp, #584] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #408] -; NONEON-NOSVE-NEXT: str x8, [sp, #576] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #420] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560] -; NONEON-NOSVE-NEXT: str x8, [sp, #600] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #416] -; NONEON-NOSVE-NEXT: str x8, [sp, #592] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #428] -; NONEON-NOSVE-NEXT: str x8, [sp, #616] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #424] -; NONEON-NOSVE-NEXT: str x8, [sp, #608] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #368] -; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #496] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #380] -; NONEON-NOSVE-NEXT: str x8, [sp, #520] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #376] -; NONEON-NOSVE-NEXT: str x8, [sp, #512] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #388] -; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496] -; NONEON-NOSVE-NEXT: str x8, [sp, #536] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #384] -; NONEON-NOSVE-NEXT: str x8, [sp, #528] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #396] -; NONEON-NOSVE-NEXT: str x8, [sp, #552] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #392] -; NONEON-NOSVE-NEXT: str x8, [sp, #544] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #468] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528] -; NONEON-NOSVE-NEXT: str x8, [sp, #696] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #464] -; NONEON-NOSVE-NEXT: str x8, [sp, #688] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #476] -; NONEON-NOSVE-NEXT: str x8, [sp, #712] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #472] -; NONEON-NOSVE-NEXT: str x8, [sp, #704] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #484] -; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688] -; NONEON-NOSVE-NEXT: str x8, [sp, #728] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #480] -; NONEON-NOSVE-NEXT: str x8, [sp, #720] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #492] -; NONEON-NOSVE-NEXT: str x8, [sp, #744] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #488] -; NONEON-NOSVE-NEXT: str x8, [sp, #736] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #436] -; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720] -; NONEON-NOSVE-NEXT: str x8, [sp, #632] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #432] -; NONEON-NOSVE-NEXT: str x8, [sp, #624] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #444] -; NONEON-NOSVE-NEXT: str x8, [sp, #648] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #440] -; NONEON-NOSVE-NEXT: str x8, [sp, #640] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #452] -; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624] -; NONEON-NOSVE-NEXT: str x8, [sp, #664] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #448] -; NONEON-NOSVE-NEXT: str x8, [sp, #656] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #460] -; NONEON-NOSVE-NEXT: str x8, [sp, #680] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #456] -; NONEON-NOSVE-NEXT: str x8, [sp, #672] -; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656] +; NONEON-NOSVE-NEXT: add w7, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #27] +; NONEON-NOSVE-NEXT: sxtb x9, w9 +; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #128] +; NONEON-NOSVE-NEXT: add w19, w28, w28 +; NONEON-NOSVE-NEXT: sxtb x7, w7 +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: sxtb x19, w19 +; NONEON-NOSVE-NEXT: sxtb x8, w8 +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: stp x7, x9, [sp, #112] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: add w7, w25, w25 +; NONEON-NOSVE-NEXT: stp x8, x19, [sp, #96] +; NONEON-NOSVE-NEXT: sxtb x9, w9 +; NONEON-NOSVE-NEXT: sxtb x8, w7 +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #31] +; NONEON-NOSVE-NEXT: add w19, w24, w24 +; NONEON-NOSVE-NEXT: add w7, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: sxtb x9, w19 +; NONEON-NOSVE-NEXT: sxtb x8, w7 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #33] +; NONEON-NOSVE-NEXT: add w19, w22, w22 +; NONEON-NOSVE-NEXT: add w6, w6, w6 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: sxtb x9, w19 +; NONEON-NOSVE-NEXT: sxtb x8, w6 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: add w5, w5, w5 +; NONEON-NOSVE-NEXT: add w4, w4, w4 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: sxtb x9, w5 +; NONEON-NOSVE-NEXT: sxtb x8, w4 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #37] +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: add w2, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #38] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288] +; NONEON-NOSVE-NEXT: sxtb x9, w3 +; NONEON-NOSVE-NEXT: sxtb x8, w2 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #39] +; NONEON-NOSVE-NEXT: add w0, w0, w0 +; NONEON-NOSVE-NEXT: add w18, w18, w18 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272] +; NONEON-NOSVE-NEXT: sxtb x9, w0 +; NONEON-NOSVE-NEXT: sxtb x8, w18 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256] +; NONEON-NOSVE-NEXT: sxtb x9, w17 +; NONEON-NOSVE-NEXT: sxtb x8, w16 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43] +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240] +; NONEON-NOSVE-NEXT: sxtb x9, w15 +; NONEON-NOSVE-NEXT: sxtb x8, w14 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224] +; NONEON-NOSVE-NEXT: sxtb x9, w13 +; NONEON-NOSVE-NEXT: sxtb x8, w12 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #208] +; NONEON-NOSVE-NEXT: sxtb x9, w11 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sxtb x8, w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: sxtb x11, w11 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #192] +; NONEON-NOSVE-NEXT: sxtb x8, w10 +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272] +; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240] +; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192] ; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224] -; NONEON-NOSVE-NEXT: add sp, sp, #752 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #400 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -1636,91 +1108,70 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v16i16_v16i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] -; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] -; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] -; NONEON-NOSVE-NEXT: ldrh w5, [sp] -; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] -; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w3, [sp] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14] ; NONEON-NOSVE-NEXT: add w13, w13, w13 -; NONEON-NOSVE-NEXT: add w14, w14, w14 -; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] -; NONEON-NOSVE-NEXT: strh w14, [sp, #46] -; NONEON-NOSVE-NEXT: add w14, w3, w3 -; NONEON-NOSVE-NEXT: strh w13, [sp, #44] -; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: sxth w16, w16 +; NONEON-NOSVE-NEXT: sxth w13, w13 +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: sxth w12, w12 +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10] +; NONEON-NOSVE-NEXT: stp w13, w16, [sp, #56] +; NONEON-NOSVE-NEXT: sxth w13, w3 +; NONEON-NOSVE-NEXT: add w16, w5, w5 +; NONEON-NOSVE-NEXT: add w3, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22] +; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #48] +; NONEON-NOSVE-NEXT: sxth w12, w16 +; NONEON-NOSVE-NEXT: sxth w13, w3 +; NONEON-NOSVE-NEXT: add w16, w2, w2 +; NONEON-NOSVE-NEXT: add w0, w0, w0 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w14, [sp, #42] -; NONEON-NOSVE-NEXT: add w14, w4, w4 +; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #40] +; NONEON-NOSVE-NEXT: sxth w12, w16 +; NONEON-NOSVE-NEXT: sxth w13, w0 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] -; NONEON-NOSVE-NEXT: strh w13, [sp, #40] -; NONEON-NOSVE-NEXT: add w13, w2, w2 -; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] -; NONEON-NOSVE-NEXT: strh w14, [sp, #38] -; NONEON-NOSVE-NEXT: add w14, w0, w0 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strh w13, [sp, #36] -; NONEON-NOSVE-NEXT: add w13, w18, w18 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: strh w14, [sp, #34] ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] -; NONEON-NOSVE-NEXT: strh w13, [sp, #32] -; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #16] ; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] -; NONEON-NOSVE-NEXT: strh w9, [sp, #50] -; NONEON-NOSVE-NEXT: add w14, w17, w17 -; NONEON-NOSVE-NEXT: add w12, w12, w12 -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: add w16, w18, w18 +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #32] +; NONEON-NOSVE-NEXT: sxth w12, w16 +; NONEON-NOSVE-NEXT: sxth w13, w17 +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w14, w14, w14 ; NONEON-NOSVE-NEXT: add w11, w11, w11 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] ; NONEON-NOSVE-NEXT: add w10, w10, w10 -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] -; NONEON-NOSVE-NEXT: strh w14, [sp, #62] -; NONEON-NOSVE-NEXT: add w14, w15, w15 -; NONEON-NOSVE-NEXT: strh w13, [sp, #60] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] -; NONEON-NOSVE-NEXT: strh w14, [sp, #58] -; NONEON-NOSVE-NEXT: strh w12, [sp, #56] -; NONEON-NOSVE-NEXT: strh w11, [sp, #54] -; NONEON-NOSVE-NEXT: strh w10, [sp, #52] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #88] +; NONEON-NOSVE-NEXT: sxth w12, w15 +; NONEON-NOSVE-NEXT: sxth w13, w14 +; NONEON-NOSVE-NEXT: sxth w11, w11 +; NONEON-NOSVE-NEXT: sxth w10, w10 +; NONEON-NOSVE-NEXT: sxth w9, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: stp w13, w12, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32] +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -1746,24 +1197,18 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v4i16_v4i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #40] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #32] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #8] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #12] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = sext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out @@ -1789,39 +1234,27 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v8i16_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #24] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #28] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh x9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh x8, [sp, #20] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %b = sext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -1860,124 +1293,75 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v16i16_v16i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #368 -; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [sp] -; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] -; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] -; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] -; NONEON-NOSVE-NEXT: ldrh w5, [sp] -; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] -; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w13, [sp] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #10] ; NONEON-NOSVE-NEXT: add w13, w13, w13 -; NONEON-NOSVE-NEXT: add w14, w14, w14 -; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] -; NONEON-NOSVE-NEXT: strh w14, [sp, #54] -; NONEON-NOSVE-NEXT: add w14, w3, w3 -; NONEON-NOSVE-NEXT: strh w13, [sp, #52] -; NONEON-NOSVE-NEXT: add w13, w5, w5 -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w14, [sp, #50] -; NONEON-NOSVE-NEXT: add w14, w4, w4 -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] -; NONEON-NOSVE-NEXT: strh w13, [sp, #48] -; NONEON-NOSVE-NEXT: add w13, w2, w2 -; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] -; NONEON-NOSVE-NEXT: strh w14, [sp, #46] -; NONEON-NOSVE-NEXT: add w14, w0, w0 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strh w13, [sp, #44] -; NONEON-NOSVE-NEXT: add w13, w18, w18 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: strh w14, [sp, #42] -; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] -; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] -; NONEON-NOSVE-NEXT: strh w13, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] -; NONEON-NOSVE-NEXT: strh w9, [sp, #58] -; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: add w16, w16, w16 ; NONEON-NOSVE-NEXT: add w12, w12, w12 -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: sxth x16, w16 +; NONEON-NOSVE-NEXT: sxth x13, w13 +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: sxth x12, w12 +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #14] +; NONEON-NOSVE-NEXT: stp x13, x16, [sp, #80] +; NONEON-NOSVE-NEXT: sxth x13, w3 +; NONEON-NOSVE-NEXT: add w16, w5, w5 +; NONEON-NOSVE-NEXT: add w3, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #18] +; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #64] +; NONEON-NOSVE-NEXT: sxth x12, w16 +; NONEON-NOSVE-NEXT: sxth x13, w3 +; NONEON-NOSVE-NEXT: add w16, w2, w2 +; NONEON-NOSVE-NEXT: add w0, w0, w0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #48] +; NONEON-NOSVE-NEXT: sxth x12, w16 +; NONEON-NOSVE-NEXT: sxth x13, w0 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #22] +; NONEON-NOSVE-NEXT: add w16, w18, w18 +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #32] +; NONEON-NOSVE-NEXT: sxth x12, w16 +; NONEON-NOSVE-NEXT: sxth x13, w17 +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w14, w14, w14 ; NONEON-NOSVE-NEXT: add w11, w11, w11 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] ; NONEON-NOSVE-NEXT: add w10, w10, w10 -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] -; NONEON-NOSVE-NEXT: strh w14, [sp, #70] -; NONEON-NOSVE-NEXT: add w14, w15, w15 -; NONEON-NOSVE-NEXT: strh w13, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] -; NONEON-NOSVE-NEXT: strh w14, [sp, #66] -; NONEON-NOSVE-NEXT: strh w12, [sp, #64] -; NONEON-NOSVE-NEXT: strh w11, [sp, #62] -; NONEON-NOSVE-NEXT: strh w10, [sp, #60] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: str d0, [sp, #360] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] -; NONEON-NOSVE-NEXT: str d2, [sp, #200] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320] -; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364] -; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #144] +; NONEON-NOSVE-NEXT: sxth x12, w15 +; NONEON-NOSVE-NEXT: sxth x13, w14 +; NONEON-NOSVE-NEXT: sxth x11, w11 +; NONEON-NOSVE-NEXT: sxth x10, w10 +; NONEON-NOSVE-NEXT: sxth x9, w9 +; NONEON-NOSVE-NEXT: sxth x8, w8 +; NONEON-NOSVE-NEXT: stp x13, x12, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp x10, x11, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: add sp, sp, #160 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -2037,43 +1421,38 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: sext_v8i32_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: stp q1, q0, [sp] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] -; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] ; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] -; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80] -; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: sxtw x8, w8 +; NONEON-NOSVE-NEXT: sxtw x9, w9 +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: sxtw x8, w8 +; NONEON-NOSVE-NEXT: sxtw x9, w9 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: sxtw x13, w13 +; NONEON-NOSVE-NEXT: sxtw x12, w12 +; NONEON-NOSVE-NEXT: sxtw x11, w11 +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80] +; NONEON-NOSVE-NEXT: sxtw x8, w10 +; NONEON-NOSVE-NEXT: stp x12, x13, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a @@ -2162,14 +1541,14 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v32i8_v32i16: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #272 -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: sub sp, sp, #208 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -2186,182 +1565,146 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #31] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: add w5, w17, w17 +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #30] ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: and w5, w5, #0xff +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: strh w5, [sp, #74] +; NONEON-NOSVE-NEXT: add w5, w29, w29 +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #28] +; NONEON-NOSVE-NEXT: and w5, w5, #0xff +; NONEON-NOSVE-NEXT: strh w9, [sp, #68] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #27] +; NONEON-NOSVE-NEXT: strh w5, [sp, #66] +; NONEON-NOSVE-NEXT: add w5, w26, w26 +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] ; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 -; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] -; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] -; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] -; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] -; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #26] +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: and w9, w5, #0xff +; NONEON-NOSVE-NEXT: add w5, w24, w24 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #25] +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: and w8, w5, #0xff +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #24] +; NONEON-NOSVE-NEXT: add w5, w22, w22 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #39] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: and w8, w5, #0xff +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #38] +; NONEON-NOSVE-NEXT: add w5, w20, w20 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #37] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: and w8, w5, #0xff +; NONEON-NOSVE-NEXT: add w0, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #36] +; NONEON-NOSVE-NEXT: add w4, w4, w4 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: add w18, w15, w15 +; NONEON-NOSVE-NEXT: and w6, w0, #0xff +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #35] ; NONEON-NOSVE-NEXT: add w9, w3, w3 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: strh w8, [sp, #110] +; NONEON-NOSVE-NEXT: and w8, w4, #0xff +; NONEON-NOSVE-NEXT: and w7, w18, #0xff +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #34] +; NONEON-NOSVE-NEXT: add w2, w2, w2 +; NONEON-NOSVE-NEXT: strh w8, [sp, #108] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #33] ; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: strh w8, [sp, #106] +; NONEON-NOSVE-NEXT: and w8, w2, #0xff +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: add w18, w18, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #104] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: add w9, w17, w17 +; NONEON-NOSVE-NEXT: strh w8, [sp, #102] +; NONEON-NOSVE-NEXT: and w8, w18, #0xff +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #100] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] ; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: strh w8, [sp, #98] +; NONEON-NOSVE-NEXT: and w8, w16, #0xff +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: strh w8, [sp, #96] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] ; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: and w8, w14, #0xff +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff ; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: and w8, w12, #0xff +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff ; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w6, [sp, #78] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144] +; NONEON-NOSVE-NEXT: add w6, w30, w30 +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: and w6, w6, #0xff +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: strh w7, [sp, #76] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w6, [sp, #72] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #208 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -2387,42 +1730,24 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v8i8_v8i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] -; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] -; NONEON-NOSVE-NEXT: strh w8, [sp, #28] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] -; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] -; NONEON-NOSVE-NEXT: strh w8, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] -; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] -; NONEON-NOSVE-NEXT: strh w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] -; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] -; NONEON-NOSVE-NEXT: strh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out @@ -2448,75 +1773,39 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v16i8_v16i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] -; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] -; NONEON-NOSVE-NEXT: strh w8, [sp, #60] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] -; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] -; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #52] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] -; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] -; NONEON-NOSVE-NEXT: strh w8, [sp, #44] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94] -; NONEON-NOSVE-NEXT: strh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] -; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] -; NONEON-NOSVE-NEXT: strh w8, [sp, #36] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] -; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: strh w8, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out @@ -2555,14 +1844,14 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v32i8_v32i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #464 -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464 +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -2577,258 +1866,136 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #16] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w18, w15, w15 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22] +; NONEON-NOSVE-NEXT: add w0, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: and w19, w18, #0xff +; NONEON-NOSVE-NEXT: and w7, w0, #0xff +; NONEON-NOSVE-NEXT: add w6, w17, w17 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #21] +; NONEON-NOSVE-NEXT: stp w19, w7, [sp, #104] +; NONEON-NOSVE-NEXT: add w7, w30, w30 ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: and w6, w6, #0xff +; NONEON-NOSVE-NEXT: and w7, w7, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #27] +; NONEON-NOSVE-NEXT: stp w7, w6, [sp, #96] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #88] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: and w6, w6, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #25] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: stp w8, w6, [sp, #80] +; NONEON-NOSVE-NEXT: add w6, w26, w26 +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #30] +; NONEON-NOSVE-NEXT: and w6, w6, #0xff +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #31] ; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 -; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] -; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: stp w6, w9, [sp, #72] ; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] -; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #29] +; NONEON-NOSVE-NEXT: add w6, w23, w23 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: and w9, w6, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #35] +; NONEON-NOSVE-NEXT: add w6, w21, w21 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] ; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: and w8, w6, #0xff +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #32] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #33] +; NONEON-NOSVE-NEXT: add w5, w5, w5 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: and w9, w5, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #39] +; NONEON-NOSVE-NEXT: add w3, w3, w3 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: and w8, w3, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #37] +; NONEON-NOSVE-NEXT: add w0, w0, w0 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #160] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: and w9, w0, #0xff +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #42] +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #43] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: and w8, w17, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #41] +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #144] ; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: and w9, w15, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #47] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: and w8, w13, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #128] ; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] -; NONEON-NOSVE-NEXT: add w9, w3, w3 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] -; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] +; NONEON-NOSVE-NEXT: and w9, w11, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldr w10, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] -; NONEON-NOSVE-NEXT: str w8, [sp, #284] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196] -; NONEON-NOSVE-NEXT: str w8, [sp, #280] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #276] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206] -; NONEON-NOSVE-NEXT: str w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204] -; NONEON-NOSVE-NEXT: str w8, [sp, #296] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202] -; NONEON-NOSVE-NEXT: str w8, [sp, #292] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190] -; NONEON-NOSVE-NEXT: str w8, [sp, #268] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188] -; NONEON-NOSVE-NEXT: str w8, [sp, #264] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186] -; NONEON-NOSVE-NEXT: str w8, [sp, #260] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184] -; NONEON-NOSVE-NEXT: str w8, [sp, #256] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240] -; NONEON-NOSVE-NEXT: str w8, [sp, #348] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226] -; NONEON-NOSVE-NEXT: str w8, [sp, #340] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238] -; NONEON-NOSVE-NEXT: str w8, [sp, #364] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236] -; NONEON-NOSVE-NEXT: str w8, [sp, #360] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234] -; NONEON-NOSVE-NEXT: str w8, [sp, #356] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232] -; NONEON-NOSVE-NEXT: str w8, [sp, #352] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336] -; NONEON-NOSVE-NEXT: str w8, [sp, #316] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210] -; NONEON-NOSVE-NEXT: str w8, [sp, #308] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222] -; NONEON-NOSVE-NEXT: str w8, [sp, #332] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220] -; NONEON-NOSVE-NEXT: str w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218] -; NONEON-NOSVE-NEXT: str w8, [sp, #324] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304] +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #48] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #112] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #464 +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #272 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -2858,26 +2025,20 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v4i8_v4i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = zext <4 x i8> %a to <4 x i64> store <4 x i64>%b, ptr %out @@ -2904,61 +2065,30 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v8i8_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #176 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176 +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: add x8, sp, #144 -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] -; NONEON-NOSVE-NEXT: strh w9, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] -; NONEON-NOSVE-NEXT: strh w9, [sp, #28] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] -; NONEON-NOSVE-NEXT: strh w9, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] -; NONEON-NOSVE-NEXT: strh w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] -; NONEON-NOSVE-NEXT: strh w9, [sp, #22] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] -; NONEON-NOSVE-NEXT: strh w9, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] -; NONEON-NOSVE-NEXT: strh w9, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] -; NONEON-NOSVE-NEXT: strh w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72] -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #34] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #38] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96] -; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] -; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48] -; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #152] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #144] -; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #104] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80] -; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #168] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #160] -; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #80] -; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #120] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #112] -; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #88] -; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #136] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112] -; NONEON-NOSVE-NEXT: ldp q2, q3, [x8] -; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #176 +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #80 ; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -2998,129 +2128,51 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v16i8_v16i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #368 -; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 -; NONEON-NOSVE-NEXT: str q0, [sp] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: str wzr, [sp, #332] -; NONEON-NOSVE-NEXT: str wzr, [sp, #324] -; NONEON-NOSVE-NEXT: str wzr, [sp, #348] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] -; NONEON-NOSVE-NEXT: str wzr, [sp, #340] -; NONEON-NOSVE-NEXT: str wzr, [sp, #300] -; NONEON-NOSVE-NEXT: strh w8, [sp, #70] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] -; NONEON-NOSVE-NEXT: str wzr, [sp, #292] -; NONEON-NOSVE-NEXT: strh w8, [sp, #68] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] -; NONEON-NOSVE-NEXT: str wzr, [sp, #316] -; NONEON-NOSVE-NEXT: strh w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] -; NONEON-NOSVE-NEXT: str wzr, [sp, #308] -; NONEON-NOSVE-NEXT: strh w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] -; NONEON-NOSVE-NEXT: str wzr, [sp, #268] -; NONEON-NOSVE-NEXT: strh w8, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] -; NONEON-NOSVE-NEXT: str wzr, [sp, #260] -; NONEON-NOSVE-NEXT: strh w8, [sp, #60] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] -; NONEON-NOSVE-NEXT: str wzr, [sp, #284] -; NONEON-NOSVE-NEXT: strh w8, [sp, #58] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] -; NONEON-NOSVE-NEXT: str wzr, [sp, #276] -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: strh w8, [sp, #54] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] -; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #152] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #98] -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] -; NONEON-NOSVE-NEXT: strh w8, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] -; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #120] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #102] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] -; NONEON-NOSVE-NEXT: str d0, [sp, #360] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] -; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] -; NONEON-NOSVE-NEXT: str w9, [sp, #328] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] -; NONEON-NOSVE-NEXT: str d2, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320] -; NONEON-NOSVE-NEXT: str w9, [sp, #296] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] -; NONEON-NOSVE-NEXT: str w9, [sp, #312] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288] -; NONEON-NOSVE-NEXT: str w9, [sp, #264] -; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192] -; NONEON-NOSVE-NEXT: str w9, [sp, #280] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] -; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244] -; NONEON-NOSVE-NEXT: str w8, [sp, #240] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #104] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #32] ; NONEON-NOSVE-NEXT: stp q0, q1, [x0] ; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64] ; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: add sp, sp, #160 ; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out @@ -3192,14 +2244,14 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v32i8_v32i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill -; NONEON-NOSVE-NEXT: sub sp, sp, #752 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848 +; NONEON-NOSVE-NEXT: sub sp, sp, #400 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #320] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 400 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 ; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 ; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 @@ -3213,379 +2265,168 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: str wzr, [sp, #572] -; NONEON-NOSVE-NEXT: str wzr, [sp, #564] -; NONEON-NOSVE-NEXT: str wzr, [sp, #588] +; NONEON-NOSVE-NEXT: str wzr, [sp, #172] +; NONEON-NOSVE-NEXT: str wzr, [sp, #292] +; NONEON-NOSVE-NEXT: str wzr, [sp, #300] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] -; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] -; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] -; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] -; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] -; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] -; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strb w8, [sp, #60] -; NONEON-NOSVE-NEXT: add w8, w29, w29 -; NONEON-NOSVE-NEXT: strb w8, [sp, #58] -; NONEON-NOSVE-NEXT: add w8, w27, w27 -; NONEON-NOSVE-NEXT: add w18, w16, w16 -; NONEON-NOSVE-NEXT: strb w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w8, w25, w25 -; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] -; NONEON-NOSVE-NEXT: strb w8, [sp, #54] -; NONEON-NOSVE-NEXT: add w8, w23, w23 -; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] -; NONEON-NOSVE-NEXT: strb w8, [sp, #52] -; NONEON-NOSVE-NEXT: add w8, w21, w21 -; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] -; NONEON-NOSVE-NEXT: strb w8, [sp, #50] -; NONEON-NOSVE-NEXT: add w8, w19, w19 -; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] -; NONEON-NOSVE-NEXT: strb w9, [sp, #59] -; NONEON-NOSVE-NEXT: add w9, w28, w28 -; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] -; NONEON-NOSVE-NEXT: strb w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w8, w6, w6 -; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] -; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] -; NONEON-NOSVE-NEXT: strb w9, [sp, #57] -; NONEON-NOSVE-NEXT: add w9, w26, w26 -; NONEON-NOSVE-NEXT: strb w8, [sp, #78] -; NONEON-NOSVE-NEXT: add w8, w4, w4 -; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] -; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] ; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] -; NONEON-NOSVE-NEXT: add w17, w17, w17 -; NONEON-NOSVE-NEXT: strb w9, [sp, #55] -; NONEON-NOSVE-NEXT: add w9, w24, w24 -; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] -; NONEON-NOSVE-NEXT: strb w8, [sp, #76] -; NONEON-NOSVE-NEXT: add w8, w2, w2 -; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] -; NONEON-NOSVE-NEXT: strb w9, [sp, #53] -; NONEON-NOSVE-NEXT: add w9, w22, w22 -; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] -; NONEON-NOSVE-NEXT: strb w8, [sp, #74] -; NONEON-NOSVE-NEXT: add w8, w16, w16 -; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] -; NONEON-NOSVE-NEXT: strb w17, [sp, #63] -; NONEON-NOSVE-NEXT: add w17, w30, w30 -; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] -; NONEON-NOSVE-NEXT: strb w9, [sp, #51] -; NONEON-NOSVE-NEXT: add w9, w20, w20 -; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] -; NONEON-NOSVE-NEXT: strb w8, [sp, #72] -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] -; NONEON-NOSVE-NEXT: strb w18, [sp, #62] -; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] -; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] -; NONEON-NOSVE-NEXT: strb w17, [sp, #61] -; NONEON-NOSVE-NEXT: strb w9, [sp, #49] -; NONEON-NOSVE-NEXT: add w9, w7, w7 -; NONEON-NOSVE-NEXT: strb w8, [sp, #70] -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: strb w8, [sp, #68] -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: strb w8, [sp, #66] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strb w9, [sp, #79] -; NONEON-NOSVE-NEXT: add w9, w5, w5 -; NONEON-NOSVE-NEXT: strb w9, [sp, #77] -; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #23] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #26] ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: strb w8, [sp, #64] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] -; NONEON-NOSVE-NEXT: strb w9, [sp, #75] -; NONEON-NOSVE-NEXT: add w9, w0, w0 -; NONEON-NOSVE-NEXT: strh w8, [sp, #134] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] -; NONEON-NOSVE-NEXT: strb w9, [sp, #73] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: strh w8, [sp, #132] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] -; NONEON-NOSVE-NEXT: strb w9, [sp, #71] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: strh w8, [sp, #130] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] -; NONEON-NOSVE-NEXT: strb w9, [sp, #69] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: strh w8, [sp, #128] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] -; NONEON-NOSVE-NEXT: strb w9, [sp, #67] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: strh w8, [sp, #142] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: str wzr, [sp, #580] -; NONEON-NOSVE-NEXT: strh w8, [sp, #140] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] -; NONEON-NOSVE-NEXT: strb w9, [sp, #65] -; NONEON-NOSVE-NEXT: strh w8, [sp, #138] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: str wzr, [sp, #604] -; NONEON-NOSVE-NEXT: strh w8, [sp, #136] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] -; NONEON-NOSVE-NEXT: str wzr, [sp, #596] -; NONEON-NOSVE-NEXT: strh w8, [sp, #118] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] -; NONEON-NOSVE-NEXT: strh w8, [sp, #116] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] -; NONEON-NOSVE-NEXT: str wzr, [sp, #620] -; NONEON-NOSVE-NEXT: strh w8, [sp, #114] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] -; NONEON-NOSVE-NEXT: strh w8, [sp, #112] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] -; NONEON-NOSVE-NEXT: str wzr, [sp, #612] -; NONEON-NOSVE-NEXT: strh w8, [sp, #126] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] -; NONEON-NOSVE-NEXT: str wzr, [sp, #508] -; NONEON-NOSVE-NEXT: strh w8, [sp, #124] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] -; NONEON-NOSVE-NEXT: str wzr, [sp, #500] -; NONEON-NOSVE-NEXT: strh w8, [sp, #122] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] -; NONEON-NOSVE-NEXT: str wzr, [sp, #524] -; NONEON-NOSVE-NEXT: strh w8, [sp, #120] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] -; NONEON-NOSVE-NEXT: str wzr, [sp, #516] -; NONEON-NOSVE-NEXT: strh w8, [sp, #166] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] -; NONEON-NOSVE-NEXT: str wzr, [sp, #540] -; NONEON-NOSVE-NEXT: strh w8, [sp, #164] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] -; NONEON-NOSVE-NEXT: strh w8, [sp, #162] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] -; NONEON-NOSVE-NEXT: str wzr, [sp, #532] -; NONEON-NOSVE-NEXT: strh w8, [sp, #160] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] -; NONEON-NOSVE-NEXT: str wzr, [sp, #556] -; NONEON-NOSVE-NEXT: strh w8, [sp, #174] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] -; NONEON-NOSVE-NEXT: str wzr, [sp, #548] -; NONEON-NOSVE-NEXT: strh w8, [sp, #172] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] -; NONEON-NOSVE-NEXT: str wzr, [sp, #700] -; NONEON-NOSVE-NEXT: strh w8, [sp, #170] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] -; NONEON-NOSVE-NEXT: str wzr, [sp, #692] -; NONEON-NOSVE-NEXT: strh w8, [sp, #168] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] -; NONEON-NOSVE-NEXT: str wzr, [sp, #716] -; NONEON-NOSVE-NEXT: strh w8, [sp, #150] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] -; NONEON-NOSVE-NEXT: str wzr, [sp, #708] -; NONEON-NOSVE-NEXT: strh w8, [sp, #148] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] -; NONEON-NOSVE-NEXT: strh w8, [sp, #146] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] -; NONEON-NOSVE-NEXT: str wzr, [sp, #732] -; NONEON-NOSVE-NEXT: strh w8, [sp, #144] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] -; NONEON-NOSVE-NEXT: str wzr, [sp, #724] -; NONEON-NOSVE-NEXT: strh w8, [sp, #158] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] -; NONEON-NOSVE-NEXT: str wzr, [sp, #748] -; NONEON-NOSVE-NEXT: strh w8, [sp, #156] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] -; NONEON-NOSVE-NEXT: str wzr, [sp, #740] -; NONEON-NOSVE-NEXT: strh w8, [sp, #154] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] -; NONEON-NOSVE-NEXT: str wzr, [sp, #636] -; NONEON-NOSVE-NEXT: strh w8, [sp, #152] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] -; NONEON-NOSVE-NEXT: str wzr, [sp, #628] -; NONEON-NOSVE-NEXT: str w8, [sp, #276] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192] -; NONEON-NOSVE-NEXT: str wzr, [sp, #652] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #284] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196] -; NONEON-NOSVE-NEXT: str wzr, [sp, #644] -; NONEON-NOSVE-NEXT: str w8, [sp, #280] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272] -; NONEON-NOSVE-NEXT: str wzr, [sp, #668] -; NONEON-NOSVE-NEXT: str w8, [sp, #292] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200] -; NONEON-NOSVE-NEXT: str wzr, [sp, #660] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400] -; NONEON-NOSVE-NEXT: str w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204] -; NONEON-NOSVE-NEXT: str wzr, [sp, #684] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #27] +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #29] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #140] +; NONEON-NOSVE-NEXT: add w8, w30, w30 +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #28] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #148] +; NONEON-NOSVE-NEXT: add w9, w29, w29 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #132] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #124] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #108] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #116] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #100] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #92] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #30] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #31] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #84] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #33] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #32] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #60] +; NONEON-NOSVE-NEXT: add w9, w6, w6 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #34] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #35] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #52] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: add w0, w16, w16 +; NONEON-NOSVE-NEXT: add w22, w18, w18 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #37] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff ; NONEON-NOSVE-NEXT: str w8, [sp, #296] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288] -; NONEON-NOSVE-NEXT: str wzr, [sp, #676] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] -; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240] -; NONEON-NOSVE-NEXT: str w8, [sp, #260] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184] -; NONEON-NOSVE-NEXT: str w8, [sp, #256] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368] -; NONEON-NOSVE-NEXT: str w8, [sp, #268] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: and w23, w0, #0xff +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #36] +; NONEON-NOSVE-NEXT: str w9, [sp, #288] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #38] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #39] +; NONEON-NOSVE-NEXT: str w9, [sp, #280] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #41] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff ; NONEON-NOSVE-NEXT: str w8, [sp, #264] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256] -; NONEON-NOSVE-NEXT: str w8, [sp, #340] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384] -; NONEON-NOSVE-NEXT: str w8, [sp, #348] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336] -; NONEON-NOSVE-NEXT: str w8, [sp, #356] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232] -; NONEON-NOSVE-NEXT: str w8, [sp, #352] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464] -; NONEON-NOSVE-NEXT: str w8, [sp, #364] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236] -; NONEON-NOSVE-NEXT: str w8, [sp, #360] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352] -; NONEON-NOSVE-NEXT: str w8, [sp, #308] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480] -; NONEON-NOSVE-NEXT: str w8, [sp, #316] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304] -; NONEON-NOSVE-NEXT: str w8, [sp, #324] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432] -; NONEON-NOSVE-NEXT: str w8, [sp, #332] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220] -; NONEON-NOSVE-NEXT: str w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320] -; NONEON-NOSVE-NEXT: str w8, [sp, #568] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] -; NONEON-NOSVE-NEXT: str w8, [sp, #560] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #412] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448] -; NONEON-NOSVE-NEXT: str w8, [sp, #584] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #408] -; NONEON-NOSVE-NEXT: str w8, [sp, #576] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #420] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560] -; NONEON-NOSVE-NEXT: str w8, [sp, #600] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #416] -; NONEON-NOSVE-NEXT: str w8, [sp, #592] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #428] -; NONEON-NOSVE-NEXT: str w8, [sp, #616] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #424] -; NONEON-NOSVE-NEXT: str w8, [sp, #608] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] -; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592] -; NONEON-NOSVE-NEXT: str w8, [sp, #504] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] -; NONEON-NOSVE-NEXT: str w8, [sp, #496] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] -; NONEON-NOSVE-NEXT: str w8, [sp, #520] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] -; NONEON-NOSVE-NEXT: str w8, [sp, #512] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] -; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496] -; NONEON-NOSVE-NEXT: str w8, [sp, #536] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] -; NONEON-NOSVE-NEXT: str w8, [sp, #528] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] -; NONEON-NOSVE-NEXT: str w8, [sp, #552] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] -; NONEON-NOSVE-NEXT: str w8, [sp, #544] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #468] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528] -; NONEON-NOSVE-NEXT: str w8, [sp, #696] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #464] -; NONEON-NOSVE-NEXT: str w8, [sp, #688] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #476] -; NONEON-NOSVE-NEXT: str w8, [sp, #712] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #472] -; NONEON-NOSVE-NEXT: str w8, [sp, #704] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #484] -; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688] -; NONEON-NOSVE-NEXT: str w8, [sp, #728] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #480] -; NONEON-NOSVE-NEXT: str w8, [sp, #720] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #492] -; NONEON-NOSVE-NEXT: str w8, [sp, #744] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #488] -; NONEON-NOSVE-NEXT: str w8, [sp, #736] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #436] -; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720] -; NONEON-NOSVE-NEXT: str w8, [sp, #632] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #432] -; NONEON-NOSVE-NEXT: str w8, [sp, #624] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #444] -; NONEON-NOSVE-NEXT: str w8, [sp, #648] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #440] -; NONEON-NOSVE-NEXT: str w8, [sp, #640] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #452] -; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624] -; NONEON-NOSVE-NEXT: str w8, [sp, #664] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #448] -; NONEON-NOSVE-NEXT: str w8, [sp, #656] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #460] -; NONEON-NOSVE-NEXT: str w8, [sp, #680] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #456] -; NONEON-NOSVE-NEXT: str w8, [sp, #672] -; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #40] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #252] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #42] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #236] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #43] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #45] +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #228] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #44] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #220] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #204] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #212] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #196] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #188] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: and w22, w22, #0xff +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp wzr, w22, [sp, #164] +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp wzr, w23, [sp, #156] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #180] +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: str wzr, [sp, #276] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144] +; NONEON-NOSVE-NEXT: str wzr, [sp, #284] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] +; NONEON-NOSVE-NEXT: str wzr, [sp, #260] +; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #80] +; NONEON-NOSVE-NEXT: str wzr, [sp, #268] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #48] +; NONEON-NOSVE-NEXT: str w8, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q17, q16, [sp, #272] +; NONEON-NOSVE-NEXT: ldp q18, q21, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q20, q19, [sp, #240] +; NONEON-NOSVE-NEXT: ldp q23, q22, [sp, #208] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #336] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #320] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192] ; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224] -; NONEON-NOSVE-NEXT: add sp, sp, #752 -; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #400 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -3653,91 +2494,70 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v16i16_v16i32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: stp q1, q0, [sp] -; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] -; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] -; NONEON-NOSVE-NEXT: ldrh w5, [sp] -; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] -; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] -; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w3, [sp] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #14] ; NONEON-NOSVE-NEXT: add w14, w14, w14 -; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] -; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: add w16, w16, w16 +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: and w17, w17, #0xffff +; NONEON-NOSVE-NEXT: and w16, w16, #0xffff +; NONEON-NOSVE-NEXT: stp w17, w14, [sp, #56] ; NONEON-NOSVE-NEXT: add w14, w3, w3 -; NONEON-NOSVE-NEXT: strh w13, [sp, #44] -; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #8] +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #10] +; NONEON-NOSVE-NEXT: add w17, w5, w5 +; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #48] +; NONEON-NOSVE-NEXT: add w16, w4, w4 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w14, [sp, #42] -; NONEON-NOSVE-NEXT: add w14, w4, w4 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] -; NONEON-NOSVE-NEXT: strh w13, [sp, #40] -; NONEON-NOSVE-NEXT: add w13, w2, w2 -; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] -; NONEON-NOSVE-NEXT: strh w14, [sp, #38] -; NONEON-NOSVE-NEXT: add w14, w0, w0 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strh w13, [sp, #36] -; NONEON-NOSVE-NEXT: add w13, w18, w18 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: strh w14, [sp, #34] ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] -; NONEON-NOSVE-NEXT: strh w13, [sp, #32] ; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] -; NONEON-NOSVE-NEXT: strh w9, [sp, #50] -; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #22] +; NONEON-NOSVE-NEXT: and w14, w17, #0xffff +; NONEON-NOSVE-NEXT: and w16, w16, #0xffff +; NONEON-NOSVE-NEXT: add w17, w2, w2 +; NONEON-NOSVE-NEXT: stp w16, w14, [sp, #40] +; NONEON-NOSVE-NEXT: add w14, w0, w0 +; NONEON-NOSVE-NEXT: and w16, w17, #0xffff +; NONEON-NOSVE-NEXT: add w17, w18, w18 +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: add w13, w13, w13 ; NONEON-NOSVE-NEXT: add w12, w12, w12 -; NONEON-NOSVE-NEXT: strh w8, [sp, #48] -; NONEON-NOSVE-NEXT: add w13, w16, w16 ; NONEON-NOSVE-NEXT: add w11, w11, w11 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] ; NONEON-NOSVE-NEXT: add w10, w10, w10 -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76] -; NONEON-NOSVE-NEXT: strh w14, [sp, #62] -; NONEON-NOSVE-NEXT: add w14, w15, w15 -; NONEON-NOSVE-NEXT: strh w13, [sp, #60] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72] -; NONEON-NOSVE-NEXT: strh w14, [sp, #58] -; NONEON-NOSVE-NEXT: strh w12, [sp, #56] -; NONEON-NOSVE-NEXT: strh w11, [sp, #54] -; NONEON-NOSVE-NEXT: strh w10, [sp, #52] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp w14, w16, [sp, #32] +; NONEON-NOSVE-NEXT: and w14, w17, #0xffff +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: and w13, w13, #0xffff +; NONEON-NOSVE-NEXT: and w12, w12, #0xffff +; NONEON-NOSVE-NEXT: and w11, w11, #0xffff +; NONEON-NOSVE-NEXT: and w10, w10, #0xffff +; NONEON-NOSVE-NEXT: and w9, w9, #0xffff +; NONEON-NOSVE-NEXT: and w8, w8, #0xffff +; NONEON-NOSVE-NEXT: stp w15, w14, [sp, #88] +; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32] +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -3763,26 +2583,20 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v4i16_v4i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #24] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %b = zext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out @@ -3808,43 +2622,31 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v8i16_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #88] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #72] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #56] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #40] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %b = zext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -3883,144 +2685,85 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v16i16_v16i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #368 -; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: str wzr, [sp, #268] -; NONEON-NOSVE-NEXT: str wzr, [sp, #260] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload -; NONEON-NOSVE-NEXT: str wzr, [sp, #284] +; NONEON-NOSVE-NEXT: str wzr, [sp, #92] +; NONEON-NOSVE-NEXT: str wzr, [sp, #156] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp] -; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] -; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] -; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] -; NONEON-NOSVE-NEXT: ldrh w5, [sp] -; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] -; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] -; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: ldrh w14, [sp] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #10] ; NONEON-NOSVE-NEXT: add w14, w14, w14 -; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] -; NONEON-NOSVE-NEXT: strh w14, [sp, #54] -; NONEON-NOSVE-NEXT: add w14, w3, w3 -; NONEON-NOSVE-NEXT: strh w13, [sp, #52] -; NONEON-NOSVE-NEXT: add w13, w5, w5 -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: strh w14, [sp, #50] -; NONEON-NOSVE-NEXT: add w14, w4, w4 -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] -; NONEON-NOSVE-NEXT: strh w13, [sp, #48] -; NONEON-NOSVE-NEXT: add w13, w2, w2 -; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] -; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w15, w15, w15 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #12] +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #14] +; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #84] +; NONEON-NOSVE-NEXT: add w15, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #76] +; NONEON-NOSVE-NEXT: add w14, w5, w5 +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #60] +; NONEON-NOSVE-NEXT: add w15, w3, w3 +; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #68] +; NONEON-NOSVE-NEXT: add w14, w2, w2 +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #18] +; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #52] +; NONEON-NOSVE-NEXT: add w15, w18, w18 +; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #44] ; NONEON-NOSVE-NEXT: add w14, w0, w0 -; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: strh w13, [sp, #44] -; NONEON-NOSVE-NEXT: add w13, w18, w18 -; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: strh w14, [sp, #42] -; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] -; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] -; NONEON-NOSVE-NEXT: strh w13, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] -; NONEON-NOSVE-NEXT: add w14, w17, w17 -; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] -; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #36] +; NONEON-NOSVE-NEXT: add w14, w16, w16 ; NONEON-NOSVE-NEXT: add w12, w12, w12 -; NONEON-NOSVE-NEXT: strh w8, [sp, #56] -; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: str w15, [sp, #32] +; NONEON-NOSVE-NEXT: add w15, w17, w17 ; NONEON-NOSVE-NEXT: add w10, w10, w10 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: add w13, w16, w16 -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80] -; NONEON-NOSVE-NEXT: strh w14, [sp, #70] -; NONEON-NOSVE-NEXT: add w14, w15, w15 -; NONEON-NOSVE-NEXT: strh w13, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84] -; NONEON-NOSVE-NEXT: strh w14, [sp, #66] -; NONEON-NOSVE-NEXT: strh w12, [sp, #64] -; NONEON-NOSVE-NEXT: strh w11, [sp, #62] -; NONEON-NOSVE-NEXT: strh w10, [sp, #60] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72] -; NONEON-NOSVE-NEXT: str wzr, [sp, #276] -; NONEON-NOSVE-NEXT: str wzr, [sp, #332] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #98] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #96] -; NONEON-NOSVE-NEXT: str wzr, [sp, #324] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #102] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #100] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88] -; NONEON-NOSVE-NEXT: str wzr, [sp, #348] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92] -; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168] -; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] -; NONEON-NOSVE-NEXT: str d0, [sp, #360] -; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] -; NONEON-NOSVE-NEXT: str wzr, [sp, #340] -; NONEON-NOSVE-NEXT: str w9, [sp, #264] -; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: str d2, [sp, #200] -; NONEON-NOSVE-NEXT: str w9, [sp, #280] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] -; NONEON-NOSVE-NEXT: str wzr, [sp, #300] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] -; NONEON-NOSVE-NEXT: str wzr, [sp, #292] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] -; NONEON-NOSVE-NEXT: str wzr, [sp, #316] -; NONEON-NOSVE-NEXT: str wzr, [sp, #308] -; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244] -; NONEON-NOSVE-NEXT: str w8, [sp, #240] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] -; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] -; NONEON-NOSVE-NEXT: str w9, [sp, #328] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] -; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320] -; NONEON-NOSVE-NEXT: str w9, [sp, #296] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] -; NONEON-NOSVE-NEXT: str w9, [sp, #312] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288] +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: and w14, w14, #0xffff +; NONEON-NOSVE-NEXT: and w15, w15, #0xffff +; NONEON-NOSVE-NEXT: and w13, w13, #0xffff +; NONEON-NOSVE-NEXT: and w12, w12, #0xffff +; NONEON-NOSVE-NEXT: and w10, w10, #0xffff +; NONEON-NOSVE-NEXT: and w11, w11, #0xffff +; NONEON-NOSVE-NEXT: and w9, w9, #0xffff +; NONEON-NOSVE-NEXT: and w8, w8, #0xffff +; NONEON-NOSVE-NEXT: stp wzr, w15, [sp, #148] +; NONEON-NOSVE-NEXT: stp wzr, w14, [sp, #140] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp wzr, w12, [sp, #132] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #32] +; NONEON-NOSVE-NEXT: stp wzr, w13, [sp, #124] +; NONEON-NOSVE-NEXT: stp wzr, w11, [sp, #116] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #128] +; NONEON-NOSVE-NEXT: stp wzr, w10, [sp, #108] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #100] +; NONEON-NOSVE-NEXT: str w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #96] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: add sp, sp, #160 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -4082,47 +2825,34 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) { ; ; NONEON-NOSVE-LABEL: zext_v8i32_v8i64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] -; NONEON-NOSVE-NEXT: stp q1, q0, [sp] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] -; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8] -; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #16] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: add w10, w15, w15 +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #40] +; NONEON-NOSVE-NEXT: add w10, w14, w14 +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: add w10, w13, w13 ; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #88] +; NONEON-NOSVE-NEXT: add w10, w12, w12 ; NONEON-NOSVE-NEXT: add w8, w8, w8 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: add w9, w13, w13 -; NONEON-NOSVE-NEXT: add w8, w12, w12 -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: add w9, w15, w15 -; NONEON-NOSVE-NEXT: add w8, w14, w14 -; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: add w9, w11, w11 -; NONEON-NOSVE-NEXT: add w8, w10, w10 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136] -; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp w11, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index 46a2ce6ed710..7df362826d05 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -452,29 +452,23 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: ldr d0, [x0] ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = uitofp <4 x i16> %op1 to <4 x double> @@ -506,49 +500,36 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x double> @@ -602,92 +583,63 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #336 -; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q0, q1, [sp] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] -; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: str d1, [sp, #328] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] -; NONEON-NOSVE-NEXT: str d0, [sp, #168] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 -; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] ; NONEON-NOSVE-NEXT: ucvtf d1, w8 -; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] -; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] ; NONEON-NOSVE-NEXT: ucvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] -; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128] ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: add sp, sp, #192 ; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x double> @@ -1891,29 +1843,23 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 ; NONEON-NOSVE-NEXT: ldr d0, [x0] ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: scvtf d1, w8 ; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = sitofp <4 x i16> %op1 to <4 x double> @@ -1945,49 +1891,36 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 ; NONEON-NOSVE-NEXT: ldr q0, [x0] -; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: scvtf d1, w8 ; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] -; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x double> @@ -2041,92 +1974,63 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) { ; ; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #336 -; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill -; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 -; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q0, q1, [sp] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #50] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #54] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #58] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] -; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] -; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #62] -; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] -; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] -; NONEON-NOSVE-NEXT: str d1, [sp, #328] -; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] -; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] -; NONEON-NOSVE-NEXT: str d0, [sp, #168] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: scvtf d1, w9 -; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #42] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] ; NONEON-NOSVE-NEXT: scvtf d1, w8 -; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] -; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] -; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] -; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36] ; NONEON-NOSVE-NEXT: scvtf d0, w8 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] -; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128] ; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] ; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1] ; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] -; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: add sp, sp, #192 ; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x double> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index e8c9704940c7..e6c6003ee6c6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -28,23 +28,17 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { ; ; NONEON-NOSVE-LABEL: alloc_v4i8: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #48 -; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: mov x19, x0 -; NONEON-NOSVE-NEXT: add x0, sp, #28 +; NONEON-NOSVE-NEXT: add x0, sp, #12 ; NONEON-NOSVE-NEXT: bl def -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] -; NONEON-NOSVE-NEXT: strh w8, [sp, #12] -; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] -; NONEON-NOSVE-NEXT: strh w8, [sp, #8] -; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] -; NONEON-NOSVE-NEXT: str d0, [sp, #16] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] -; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] ; NONEON-NOSVE-NEXT: strb w8, [x19, #1] ; NONEON-NOSVE-NEXT: strb w9, [x19] -; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #32 ; NONEON-NOSVE-NEXT: ret %alloc = alloca [4 x i8] call void @def(ptr %alloc) diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll index 75c5bee2ae0a..68a9dff81232 100644 --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -1245,54 +1245,48 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) { ; CHECK-NEXT: ldr x9, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: and w11, w9, #0xf -; CHECK-NEXT: ubfx w10, w9, #4, #4 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: mov.b v1[1], w10 -; CHECK-NEXT: ubfx w10, w9, #8, #4 -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ubfx w10, w9, #12, #4 -; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ubfx w10, w9, #16, #4 -; CHECK-NEXT: mov.b v1[4], w10 -; CHECK-NEXT: ubfx w10, w9, #20, #4 -; CHECK-NEXT: mov.b v1[5], w10 -; CHECK-NEXT: ubfx w10, w9, #24, #4 -; CHECK-NEXT: mov.b v1[6], w10 -; CHECK-NEXT: lsr w10, w9, #28 -; CHECK-NEXT: mov.b v1[7], w10 -; CHECK-NEXT: ubfx x10, x9, #32, #4 -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ubfx x10, x9, #36, #4 -; CHECK-NEXT: mov.b v1[9], w10 -; CHECK-NEXT: ubfx x10, x9, #40, #4 -; CHECK-NEXT: mov.b v1[10], w10 -; CHECK-NEXT: ubfx x10, x9, #44, #4 -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ubfx x10, x9, #48, #4 -; CHECK-NEXT: mov.b v1[12], w10 +; CHECK-NEXT: ubfx x12, x9, #48, #4 ; CHECK-NEXT: ubfx x10, x9, #52, #4 -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ubfx x10, x9, #56, #4 -; CHECK-NEXT: lsr x9, x9, #60 -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: ext.16b v2, v1, v1, #8 -; CHECK-NEXT: zip2.8b v3, v1, v0 -; CHECK-NEXT: zip1.8b v1, v1, v0 -; CHECK-NEXT: zip2.8b v4, v2, v0 -; CHECK-NEXT: zip1.8b v2, v2, v0 -; CHECK-NEXT: ushll.4s v3, v3, #0 +; CHECK-NEXT: ubfx x14, x9, #32, #4 +; CHECK-NEXT: ubfx w15, w9, #16, #4 +; CHECK-NEXT: ubfx x11, x9, #36, #4 +; CHECK-NEXT: ubfx w13, w9, #20, #4 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fmov s2, w14 +; CHECK-NEXT: ubfx w12, w9, #4, #4 +; CHECK-NEXT: fmov s3, w15 +; CHECK-NEXT: mov.h v1[1], w10 +; CHECK-NEXT: and w10, w9, #0xf +; CHECK-NEXT: mov.h v2[1], w11 +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: ubfx x11, x9, #56, #4 +; CHECK-NEXT: mov.h v3[1], w13 +; CHECK-NEXT: ubfx x10, x9, #40, #4 +; CHECK-NEXT: mov.h v4[1], w12 +; CHECK-NEXT: ubfx w12, w9, #24, #4 +; CHECK-NEXT: mov.h v1[2], w11 +; CHECK-NEXT: ubfx w11, w9, #8, #4 +; CHECK-NEXT: mov.h v2[2], w10 +; CHECK-NEXT: lsr x10, x9, #60 +; CHECK-NEXT: mov.h v3[2], w12 +; CHECK-NEXT: ubfx x12, x9, #44, #4 +; CHECK-NEXT: mov.h v4[2], w11 +; CHECK-NEXT: lsr w11, w9, #28 +; CHECK-NEXT: ubfx w9, w9, #12, #4 +; CHECK-NEXT: mov.h v1[3], w10 +; CHECK-NEXT: mov.h v2[3], w12 +; CHECK-NEXT: mov.h v3[3], w11 +; CHECK-NEXT: mov.h v4[3], w9 ; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: and.16b v3, v3, v0 -; CHECK-NEXT: ushll.4s v4, v4, #0 ; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: ushll.4s v3, v3, #0 +; CHECK-NEXT: ushll.4s v4, v4, #0 ; CHECK-NEXT: and.16b v1, v1, v0 -; CHECK-NEXT: and.16b v4, v4, v0 ; CHECK-NEXT: and.16b v2, v2, v0 -; CHECK-NEXT: stp q1, q3, [x1] -; CHECK-NEXT: stp q2, q4, [x1, #32] -; CHECK-NEXT: add x1, x1, #64 +; CHECK-NEXT: and.16b v3, v3, v0 +; CHECK-NEXT: and.16b v4, v4, v0 +; CHECK-NEXT: stp q2, q1, [x1, #32] +; CHECK-NEXT: stp q4, q3, [x1], #64 ; CHECK-NEXT: b.ne LBB13_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1306,59 +1300,54 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) { ; CHECK-BE-NEXT: ldr x9, [x0, x8] ; CHECK-BE-NEXT: add x8, x8, #16 ; CHECK-BE-NEXT: cmp x8, #128 -; CHECK-BE-NEXT: lsr x10, x9, #60 -; CHECK-BE-NEXT: ubfx x11, x9, #56, #4 -; CHECK-BE-NEXT: fmov s1, w10 -; CHECK-BE-NEXT: ubfx x10, x9, #52, #4 -; CHECK-BE-NEXT: mov v1.b[1], w11 -; CHECK-BE-NEXT: mov v1.b[2], w10 -; CHECK-BE-NEXT: ubfx x10, x9, #48, #4 -; CHECK-BE-NEXT: mov v1.b[3], w10 -; CHECK-BE-NEXT: ubfx x10, x9, #44, #4 -; CHECK-BE-NEXT: mov v1.b[4], w10 -; CHECK-BE-NEXT: ubfx x10, x9, #40, #4 -; CHECK-BE-NEXT: mov v1.b[5], w10 -; CHECK-BE-NEXT: ubfx x10, x9, #36, #4 -; CHECK-BE-NEXT: mov v1.b[6], w10 -; CHECK-BE-NEXT: ubfx x10, x9, #32, #4 -; CHECK-BE-NEXT: mov v1.b[7], w10 -; CHECK-BE-NEXT: lsr w10, w9, #28 -; CHECK-BE-NEXT: mov v1.b[8], w10 -; CHECK-BE-NEXT: ubfx w10, w9, #24, #4 -; CHECK-BE-NEXT: mov v1.b[9], w10 -; CHECK-BE-NEXT: ubfx w10, w9, #20, #4 -; CHECK-BE-NEXT: mov v1.b[10], w10 -; CHECK-BE-NEXT: ubfx w10, w9, #16, #4 -; CHECK-BE-NEXT: mov v1.b[11], w10 -; CHECK-BE-NEXT: ubfx w10, w9, #12, #4 -; CHECK-BE-NEXT: mov v1.b[12], w10 +; CHECK-BE-NEXT: ubfx w12, w9, #12, #4 +; CHECK-BE-NEXT: lsr w14, w9, #28 ; CHECK-BE-NEXT: ubfx w10, w9, #8, #4 -; CHECK-BE-NEXT: mov v1.b[13], w10 -; CHECK-BE-NEXT: ubfx w10, w9, #4, #4 -; CHECK-BE-NEXT: and w9, w9, #0xf -; CHECK-BE-NEXT: mov v1.b[14], w10 +; CHECK-BE-NEXT: ubfx x15, x9, #44, #4 +; CHECK-BE-NEXT: ubfx w11, w9, #24, #4 +; CHECK-BE-NEXT: ubfx x13, x9, #40, #4 +; CHECK-BE-NEXT: fmov s1, w12 +; CHECK-BE-NEXT: lsr x12, x9, #60 +; CHECK-BE-NEXT: fmov s2, w14 +; CHECK-BE-NEXT: fmov s3, w15 +; CHECK-BE-NEXT: fmov s4, w12 +; CHECK-BE-NEXT: ubfx w12, w9, #20, #4 +; CHECK-BE-NEXT: mov v1.h[1], w10 +; CHECK-BE-NEXT: ubfx x10, x9, #56, #4 +; CHECK-BE-NEXT: mov v2.h[1], w11 +; CHECK-BE-NEXT: ubfx w11, w9, #4, #4 +; CHECK-BE-NEXT: mov v3.h[1], w13 +; CHECK-BE-NEXT: mov v4.h[1], w10 +; CHECK-BE-NEXT: ubfx x10, x9, #36, #4 +; CHECK-BE-NEXT: mov v1.h[2], w11 +; CHECK-BE-NEXT: ubfx x11, x9, #52, #4 +; CHECK-BE-NEXT: mov v2.h[2], w12 +; CHECK-BE-NEXT: mov v3.h[2], w10 +; CHECK-BE-NEXT: and w10, w9, #0xf +; CHECK-BE-NEXT: ubfx w12, w9, #16, #4 +; CHECK-BE-NEXT: mov v4.h[2], w11 +; CHECK-BE-NEXT: ubfx x11, x9, #32, #4 +; CHECK-BE-NEXT: ubfx x9, x9, #48, #4 +; CHECK-BE-NEXT: mov v1.h[3], w10 +; CHECK-BE-NEXT: mov v2.h[3], w12 ; CHECK-BE-NEXT: add x10, x1, #32 -; CHECK-BE-NEXT: mov v1.b[15], w9 -; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: zip2 v3.8b, v1.8b, v0.8b -; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b -; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b -; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b -; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b -; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b -; CHECK-BE-NEXT: st1 { v3.4s }, [x9] +; CHECK-BE-NEXT: mov v3.h[3], w11 +; CHECK-BE-NEXT: mov v4.h[3], w9 ; CHECK-BE-NEXT: add x9, x1, #48 -; CHECK-BE-NEXT: and v4.16b, v4.16b, v0.16b +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b ; CHECK-BE-NEXT: and v2.16b, v2.16b, v0.16b -; CHECK-BE-NEXT: st1 { v1.4s }, [x1] -; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v4.4s }, [x9] +; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b +; CHECK-BE-NEXT: and v4.16b, v4.16b, v0.16b +; CHECK-BE-NEXT: st1 { v1.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 ; CHECK-BE-NEXT: st1 { v2.4s }, [x10] +; CHECK-BE-NEXT: st1 { v4.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v3.4s }, [x9] ; CHECK-BE-NEXT: b.ne .LBB13_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret