llvm-project/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

965 lines
32 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Test efficient codegen of vector extends up from legal type to 128 bit
; and 256 bit vector types.
; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1
;-----
; Vectors of i16.
;-----
define <8 x i16> @func1(<8 x i8> %v0) nounwind {
; CHECK-LABEL: func1:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i16>
ret <8 x i16> %r
}
define <8 x i16> @func2(<8 x i8> %v0) nounwind {
; CHECK-LABEL: func2:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll.8h v0, v0, #0
; CHECK-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i16>
ret <8 x i16> %r
}
define <16 x i16> @func3(<16 x i8> %v0) nounwind {
; CHECK-SD-LABEL: func3:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
; CHECK-SD-NEXT: ushll.8h v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: func3:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.8h v2, v0, #0
; CHECK-GI-NEXT: ushll2.8h v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
}
define <16 x i16> @func4(<16 x i8> %v0) nounwind {
; CHECK-SD-LABEL: func4:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll2.8h v1, v0, #0
; CHECK-SD-NEXT: sshll.8h v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: func4:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v2, v0, #0
; CHECK-GI-NEXT: sshll2.8h v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
}
;-----
; Vectors of i32.
;-----
define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
; CHECK-LABEL: afunc1:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: ret
%r = zext <4 x i16> %v0 to <4 x i32>
ret <4 x i32> %r
}
define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
; CHECK-LABEL: afunc2:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: ret
%r = sext <4 x i16> %v0 to <4 x i32>
ret <4 x i32> %r
}
define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
; CHECK-SD-LABEL: afunc3:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: afunc3:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v2, v0, #0
; CHECK-GI-NEXT: ushll2.4s v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
; CHECK-SD-LABEL: afunc4:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
; CHECK-SD-NEXT: sshll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: afunc4:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.4s v2, v0, #0
; CHECK-GI-NEXT: sshll2.4s v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
; CHECK-SD-LABEL: bfunc1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll.8h v0, v0, #0
; CHECK-SD-NEXT: ushll2.4s v1, v0, #0
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bfunc1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.8h v1, v0, #0
; CHECK-GI-NEXT: ushll.4s v0, v1, #0
; CHECK-GI-NEXT: ushll2.4s v1, v1, #0
; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
}
define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
; CHECK-SD-LABEL: bfunc2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll.8h v0, v0, #0
; CHECK-SD-NEXT: sshll2.4s v1, v0, #0
; CHECK-SD-NEXT: sshll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bfunc2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v1, v0, #0
; CHECK-GI-NEXT: sshll.4s v0, v1, #0
; CHECK-GI-NEXT: sshll2.4s v1, v1, #0
; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
}
;-----
; Vectors of i64.
;-----
define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
; CHECK-SD-LABEL: zfunc1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: zfunc1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.2d v2, v0, #0
; CHECK-GI-NEXT: ushll2.2d v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
; CHECK-SD-LABEL: zfunc2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
; CHECK-SD-NEXT: sshll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: zfunc2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.2d v2, v0, #0
; CHECK-GI-NEXT: sshll2.2d v1, v0, #0
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
; CHECK-SD-LABEL: bfunc3:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bfunc3:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v1, v0, #0
; CHECK-GI-NEXT: ushll.2d v0, v1, #0
; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
; CHECK-GI-NEXT: ret
%r = zext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
; CHECK-SD-LABEL: cfunc4:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll.4s v0, v0, #0
; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
; CHECK-SD-NEXT: sshll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: cfunc4:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.4s v1, v0, #0
; CHECK-GI-NEXT: sshll.2d v0, v1, #0
; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
; CHECK-GI-NEXT: ret
%r = sext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: movi.2d v1, #0x000000000000ff
; CHECK-GI-NEXT: ushll.2d v2, v0, #0
; CHECK-GI-NEXT: ushll2.2d v3, v0, #0
; CHECK-GI-NEXT: and.16b v0, v2, v1
; CHECK-GI-NEXT: and.16b v1, v3, v1
; CHECK-GI-NEXT: ret
%r = zext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ushll.2d v1, v0, #0
; CHECK-SD-NEXT: ushll2.2d v0, v0, #0
; CHECK-SD-NEXT: shl.2d v0, v0, #56
; CHECK-SD-NEXT: shl.2d v2, v1, #56
; CHECK-SD-NEXT: sshr.2d v1, v0, #56
; CHECK-SD-NEXT: sshr.2d v0, v2, #56
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: ushll.2d v1, v0, #0
; CHECK-GI-NEXT: ushll2.2d v0, v0, #0
; CHECK-GI-NEXT: shl.2d v1, v1, #56
; CHECK-GI-NEXT: shl.2d v2, v0, #56
; CHECK-GI-NEXT: sshr.2d v0, v1, #56
; CHECK-GI-NEXT: sshr.2d v1, v2, #56
; CHECK-GI-NEXT: ret
%r = sext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
}
define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll.8h v0, v0, #0
; CHECK-SD-NEXT: ushll.4s v1, v0, #0
; CHECK-SD-NEXT: ushll2.4s v2, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v1, #0
; CHECK-SD-NEXT: ushll2.2d v3, v2, #0
; CHECK-SD-NEXT: ushll2.2d v1, v1, #0
; CHECK-SD-NEXT: ushll.2d v2, v2, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.8h v0, v0, #0
; CHECK-GI-NEXT: ushll.4s v1, v0, #0
; CHECK-GI-NEXT: ushll2.4s v3, v0, #0
; CHECK-GI-NEXT: ushll.2d v0, v1, #0
; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
; CHECK-GI-NEXT: ushll.2d v2, v3, #0
; CHECK-GI-NEXT: ushll2.2d v3, v3, #0
; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
}
define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll.8h v0, v0, #0
; CHECK-SD-NEXT: sshll.4s v1, v0, #0
; CHECK-SD-NEXT: sshll2.4s v2, v0, #0
; CHECK-SD-NEXT: sshll.2d v0, v1, #0
; CHECK-SD-NEXT: sshll2.2d v3, v2, #0
; CHECK-SD-NEXT: sshll2.2d v1, v1, #0
; CHECK-SD-NEXT: sshll.2d v2, v2, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v0, v0, #0
; CHECK-GI-NEXT: sshll.4s v1, v0, #0
; CHECK-GI-NEXT: sshll2.4s v3, v0, #0
; CHECK-GI-NEXT: sshll.2d v0, v1, #0
; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
; CHECK-GI-NEXT: sshll.2d v2, v3, #0
; CHECK-GI-NEXT: sshll2.2d v3, v3, #0
; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
}
; Extends of vectors of i1.
define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
; CHECK-LABEL: zext_v32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [sp, #64]
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ldr w9, [sp, #72]
; CHECK-NEXT: movi.16b v2, #1
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: ldr w8, [sp, #80]
; CHECK-NEXT: mov.b v0[1], w1
; CHECK-NEXT: mov.b v1[1], w9
; CHECK-NEXT: ldr w9, [sp]
; CHECK-NEXT: mov.b v0[2], w2
; CHECK-NEXT: mov.b v1[2], w8
; CHECK-NEXT: ldr w8, [sp, #88]
; CHECK-NEXT: mov.b v0[3], w3
; CHECK-NEXT: mov.b v1[3], w8
; CHECK-NEXT: ldr w8, [sp, #96]
; CHECK-NEXT: mov.b v0[4], w4
; CHECK-NEXT: mov.b v1[4], w8
; CHECK-NEXT: ldr w8, [sp, #104]
; CHECK-NEXT: mov.b v0[5], w5
; CHECK-NEXT: mov.b v1[5], w8
; CHECK-NEXT: ldr w8, [sp, #112]
; CHECK-NEXT: mov.b v0[6], w6
; CHECK-NEXT: mov.b v1[6], w8
; CHECK-NEXT: ldr w8, [sp, #120]
; CHECK-NEXT: mov.b v0[7], w7
; CHECK-NEXT: mov.b v1[7], w8
; CHECK-NEXT: ldr w8, [sp, #128]
; CHECK-NEXT: mov.b v0[8], w9
; CHECK-NEXT: ldr w9, [sp, #8]
; CHECK-NEXT: mov.b v1[8], w8
; CHECK-NEXT: ldr w8, [sp, #136]
; CHECK-NEXT: mov.b v0[9], w9
; CHECK-NEXT: ldr w9, [sp, #16]
; CHECK-NEXT: mov.b v1[9], w8
; CHECK-NEXT: ldr w8, [sp, #144]
; CHECK-NEXT: mov.b v0[10], w9
; CHECK-NEXT: ldr w9, [sp, #24]
; CHECK-NEXT: mov.b v1[10], w8
; CHECK-NEXT: ldr w8, [sp, #152]
; CHECK-NEXT: mov.b v0[11], w9
; CHECK-NEXT: ldr w9, [sp, #32]
; CHECK-NEXT: mov.b v1[11], w8
; CHECK-NEXT: ldr w8, [sp, #160]
; CHECK-NEXT: mov.b v0[12], w9
; CHECK-NEXT: ldr w9, [sp, #40]
; CHECK-NEXT: mov.b v1[12], w8
; CHECK-NEXT: ldr w8, [sp, #168]
; CHECK-NEXT: mov.b v0[13], w9
; CHECK-NEXT: ldr w9, [sp, #48]
; CHECK-NEXT: mov.b v1[13], w8
; CHECK-NEXT: ldr w8, [sp, #176]
; CHECK-NEXT: mov.b v0[14], w9
; CHECK-NEXT: ldr w9, [sp, #56]
; CHECK-NEXT: mov.b v1[14], w8
; CHECK-NEXT: ldr w8, [sp, #184]
; CHECK-NEXT: mov.b v0[15], w9
; CHECK-NEXT: mov.b v1[15], w8
; CHECK-NEXT: and.16b v0, v0, v2
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: ret
%res = zext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
}
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
; CHECK-SD-LABEL: sext_v32i1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr w8, [sp, #64]
; CHECK-SD-NEXT: fmov s1, w0
; CHECK-SD-NEXT: ldr w9, [sp, #72]
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: ldr w8, [sp, #80]
; CHECK-SD-NEXT: mov.b v1[1], w1
; CHECK-SD-NEXT: mov.b v0[1], w9
; CHECK-SD-NEXT: ldr w9, [sp]
; CHECK-SD-NEXT: mov.b v1[2], w2
; CHECK-SD-NEXT: mov.b v0[2], w8
; CHECK-SD-NEXT: ldr w8, [sp, #88]
; CHECK-SD-NEXT: mov.b v1[3], w3
; CHECK-SD-NEXT: mov.b v0[3], w8
; CHECK-SD-NEXT: ldr w8, [sp, #96]
; CHECK-SD-NEXT: mov.b v1[4], w4
; CHECK-SD-NEXT: mov.b v0[4], w8
; CHECK-SD-NEXT: ldr w8, [sp, #104]
; CHECK-SD-NEXT: mov.b v1[5], w5
; CHECK-SD-NEXT: mov.b v0[5], w8
; CHECK-SD-NEXT: ldr w8, [sp, #112]
; CHECK-SD-NEXT: mov.b v1[6], w6
; CHECK-SD-NEXT: mov.b v0[6], w8
; CHECK-SD-NEXT: ldr w8, [sp, #120]
; CHECK-SD-NEXT: mov.b v1[7], w7
; CHECK-SD-NEXT: mov.b v0[7], w8
; CHECK-SD-NEXT: ldr w8, [sp, #128]
; CHECK-SD-NEXT: mov.b v1[8], w9
; CHECK-SD-NEXT: ldr w9, [sp, #8]
; CHECK-SD-NEXT: mov.b v0[8], w8
; CHECK-SD-NEXT: ldr w8, [sp, #136]
; CHECK-SD-NEXT: mov.b v1[9], w9
; CHECK-SD-NEXT: ldr w9, [sp, #16]
; CHECK-SD-NEXT: mov.b v0[9], w8
; CHECK-SD-NEXT: ldr w8, [sp, #144]
; CHECK-SD-NEXT: mov.b v1[10], w9
; CHECK-SD-NEXT: ldr w9, [sp, #24]
; CHECK-SD-NEXT: mov.b v0[10], w8
; CHECK-SD-NEXT: ldr w8, [sp, #152]
; CHECK-SD-NEXT: mov.b v1[11], w9
; CHECK-SD-NEXT: ldr w9, [sp, #32]
; CHECK-SD-NEXT: mov.b v0[11], w8
; CHECK-SD-NEXT: ldr w8, [sp, #160]
; CHECK-SD-NEXT: mov.b v1[12], w9
; CHECK-SD-NEXT: ldr w9, [sp, #40]
; CHECK-SD-NEXT: mov.b v0[12], w8
; CHECK-SD-NEXT: ldr w8, [sp, #168]
; CHECK-SD-NEXT: mov.b v1[13], w9
; CHECK-SD-NEXT: ldr w9, [sp, #48]
; CHECK-SD-NEXT: mov.b v0[13], w8
; CHECK-SD-NEXT: ldr w8, [sp, #176]
; CHECK-SD-NEXT: mov.b v1[14], w9
; CHECK-SD-NEXT: ldr w9, [sp, #56]
; CHECK-SD-NEXT: mov.b v0[14], w8
; CHECK-SD-NEXT: ldr w8, [sp, #184]
; CHECK-SD-NEXT: mov.b v1[15], w9
; CHECK-SD-NEXT: mov.b v0[15], w8
; CHECK-SD-NEXT: shl.16b v1, v1, #7
; CHECK-SD-NEXT: shl.16b v2, v0, #7
; CHECK-SD-NEXT: cmlt.16b v0, v1, #0
; CHECK-SD-NEXT: cmlt.16b v1, v2, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v32i1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w8, [sp, #64]
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: ldr w9, [sp, #72]
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: ldr w8, [sp, #80]
; CHECK-GI-NEXT: mov.b v0[1], w1
; CHECK-GI-NEXT: mov.b v1[1], w9
; CHECK-GI-NEXT: ldr w9, [sp, #128]
; CHECK-GI-NEXT: mov.b v0[2], w2
; CHECK-GI-NEXT: mov.b v1[2], w8
; CHECK-GI-NEXT: ldr w8, [sp, #88]
; CHECK-GI-NEXT: mov.b v0[3], w3
; CHECK-GI-NEXT: mov.b v1[3], w8
; CHECK-GI-NEXT: ldr w8, [sp, #96]
; CHECK-GI-NEXT: mov.b v0[4], w4
; CHECK-GI-NEXT: mov.b v1[4], w8
; CHECK-GI-NEXT: ldr w8, [sp, #104]
; CHECK-GI-NEXT: mov.b v0[5], w5
; CHECK-GI-NEXT: mov.b v1[5], w8
; CHECK-GI-NEXT: ldr w8, [sp, #112]
; CHECK-GI-NEXT: mov.b v0[6], w6
; CHECK-GI-NEXT: mov.b v1[6], w8
; CHECK-GI-NEXT: ldr w8, [sp, #120]
; CHECK-GI-NEXT: mov.b v0[7], w7
; CHECK-GI-NEXT: mov.b v1[7], w8
; CHECK-GI-NEXT: ldr w8, [sp]
; CHECK-GI-NEXT: mov.b v0[8], w8
; CHECK-GI-NEXT: ldr w8, [sp, #8]
; CHECK-GI-NEXT: mov.b v1[8], w9
; CHECK-GI-NEXT: ldr w9, [sp, #136]
; CHECK-GI-NEXT: mov.b v0[9], w8
; CHECK-GI-NEXT: ldr w8, [sp, #16]
; CHECK-GI-NEXT: mov.b v1[9], w9
; CHECK-GI-NEXT: ldr w9, [sp, #144]
; CHECK-GI-NEXT: mov.b v0[10], w8
; CHECK-GI-NEXT: ldr w8, [sp, #24]
; CHECK-GI-NEXT: mov.b v1[10], w9
; CHECK-GI-NEXT: ldr w9, [sp, #152]
; CHECK-GI-NEXT: mov.b v0[11], w8
; CHECK-GI-NEXT: ldr w8, [sp, #32]
; CHECK-GI-NEXT: mov.b v1[11], w9
; CHECK-GI-NEXT: ldr w9, [sp, #160]
; CHECK-GI-NEXT: mov.b v0[12], w8
; CHECK-GI-NEXT: ldr w8, [sp, #40]
; CHECK-GI-NEXT: mov.b v1[12], w9
; CHECK-GI-NEXT: ldr w9, [sp, #168]
; CHECK-GI-NEXT: mov.b v0[13], w8
; CHECK-GI-NEXT: ldr w8, [sp, #48]
; CHECK-GI-NEXT: mov.b v1[13], w9
; CHECK-GI-NEXT: ldr w9, [sp, #176]
; CHECK-GI-NEXT: mov.b v0[14], w8
; CHECK-GI-NEXT: ldr w8, [sp, #56]
; CHECK-GI-NEXT: mov.b v1[14], w9
; CHECK-GI-NEXT: ldr w9, [sp, #184]
; CHECK-GI-NEXT: mov.b v0[15], w8
; CHECK-GI-NEXT: mov.b v1[15], w9
; CHECK-GI-NEXT: shl.16b v0, v0, #7
; CHECK-GI-NEXT: shl.16b v1, v1, #7
; CHECK-GI-NEXT: sshr.16b v0, v0, #7
; CHECK-GI-NEXT: sshr.16b v1, v1, #7
; CHECK-GI-NEXT: ret
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
}
define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
; CHECK-LABEL: zext_v64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr w8, [sp, #336]
; CHECK-NEXT: ldr w9, [sp, #208]
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ldr w10, [sp, #80]
; CHECK-NEXT: ldr w11, [sp, #216]
; CHECK-NEXT: movi.16b v4, #1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: ldr w8, [sp, #344]
; CHECK-NEXT: fmov s1, w10
; CHECK-NEXT: ldr w12, [sp, #88]
; CHECK-NEXT: mov.b v0[1], w1
; CHECK-NEXT: ldr w9, [sp, #224]
; CHECK-NEXT: ldr w10, [sp, #96]
; CHECK-NEXT: mov.b v3[1], w8
; CHECK-NEXT: mov.b v2[1], w11
; CHECK-NEXT: ldr w8, [sp, #352]
; CHECK-NEXT: mov.b v1[1], w12
; CHECK-NEXT: ldr w11, [sp, #144]
; CHECK-NEXT: mov.b v0[2], w2
; CHECK-NEXT: mov.b v3[2], w8
; CHECK-NEXT: mov.b v2[2], w9
; CHECK-NEXT: ldr w8, [sp, #360]
; CHECK-NEXT: mov.b v1[2], w10
; CHECK-NEXT: ldr w9, [sp, #232]
; CHECK-NEXT: ldr w10, [sp, #104]
; CHECK-NEXT: mov.b v0[3], w3
; CHECK-NEXT: mov.b v3[3], w8
; CHECK-NEXT: mov.b v2[3], w9
; CHECK-NEXT: ldr w8, [sp, #368]
; CHECK-NEXT: mov.b v1[3], w10
; CHECK-NEXT: ldr w9, [sp, #240]
; CHECK-NEXT: ldr w10, [sp, #112]
; CHECK-NEXT: mov.b v0[4], w4
; CHECK-NEXT: mov.b v3[4], w8
; CHECK-NEXT: mov.b v2[4], w9
; CHECK-NEXT: ldr w8, [sp, #376]
; CHECK-NEXT: mov.b v1[4], w10
; CHECK-NEXT: ldr w9, [sp, #248]
; CHECK-NEXT: ldr w10, [sp, #120]
; CHECK-NEXT: mov.b v0[5], w5
; CHECK-NEXT: mov.b v3[5], w8
; CHECK-NEXT: mov.b v2[5], w9
; CHECK-NEXT: ldr w8, [sp, #384]
; CHECK-NEXT: mov.b v1[5], w10
; CHECK-NEXT: ldr w9, [sp, #256]
; CHECK-NEXT: ldr w10, [sp, #128]
; CHECK-NEXT: mov.b v0[6], w6
; CHECK-NEXT: mov.b v3[6], w8
; CHECK-NEXT: mov.b v2[6], w9
; CHECK-NEXT: ldr w8, [sp, #392]
; CHECK-NEXT: mov.b v1[6], w10
; CHECK-NEXT: ldr w9, [sp, #264]
; CHECK-NEXT: ldr w10, [sp, #136]
; CHECK-NEXT: mov.b v0[7], w7
; CHECK-NEXT: mov.b v3[7], w8
; CHECK-NEXT: mov.b v2[7], w9
; CHECK-NEXT: ldr w8, [sp, #16]
; CHECK-NEXT: mov.b v1[7], w10
; CHECK-NEXT: ldr w9, [sp, #400]
; CHECK-NEXT: ldr w10, [sp, #272]
; CHECK-NEXT: mov.b v0[8], w8
; CHECK-NEXT: ldr w8, [sp, #24]
; CHECK-NEXT: mov.b v3[8], w9
; CHECK-NEXT: mov.b v2[8], w10
; CHECK-NEXT: ldr w9, [sp, #408]
; CHECK-NEXT: mov.b v1[8], w11
; CHECK-NEXT: ldr w10, [sp, #280]
; CHECK-NEXT: ldr w11, [sp, #152]
; CHECK-NEXT: mov.b v0[9], w8
; CHECK-NEXT: ldr w8, [sp, #32]
; CHECK-NEXT: mov.b v3[9], w9
; CHECK-NEXT: mov.b v2[9], w10
; CHECK-NEXT: ldr w9, [sp, #416]
; CHECK-NEXT: mov.b v1[9], w11
; CHECK-NEXT: ldr w10, [sp, #288]
; CHECK-NEXT: ldr w11, [sp, #160]
; CHECK-NEXT: mov.b v0[10], w8
; CHECK-NEXT: ldr w8, [sp, #40]
; CHECK-NEXT: mov.b v3[10], w9
; CHECK-NEXT: mov.b v2[10], w10
; CHECK-NEXT: ldr w9, [sp, #424]
; CHECK-NEXT: mov.b v1[10], w11
; CHECK-NEXT: ldr w10, [sp, #296]
; CHECK-NEXT: ldr w11, [sp, #168]
; CHECK-NEXT: mov.b v0[11], w8
; CHECK-NEXT: ldr w8, [sp, #48]
; CHECK-NEXT: mov.b v3[11], w9
; CHECK-NEXT: mov.b v2[11], w10
; CHECK-NEXT: ldr w9, [sp, #432]
; CHECK-NEXT: mov.b v1[11], w11
; CHECK-NEXT: ldr w10, [sp, #304]
; CHECK-NEXT: ldr w11, [sp, #176]
; CHECK-NEXT: mov.b v0[12], w8
; CHECK-NEXT: ldr w8, [sp, #56]
; CHECK-NEXT: mov.b v3[12], w9
; CHECK-NEXT: mov.b v2[12], w10
; CHECK-NEXT: ldr w9, [sp, #440]
; CHECK-NEXT: mov.b v1[12], w11
; CHECK-NEXT: ldr w10, [sp, #312]
; CHECK-NEXT: ldr w11, [sp, #184]
; CHECK-NEXT: mov.b v0[13], w8
; CHECK-NEXT: ldr w8, [sp, #64]
; CHECK-NEXT: mov.b v3[13], w9
; CHECK-NEXT: mov.b v2[13], w10
; CHECK-NEXT: ldr w9, [sp, #448]
; CHECK-NEXT: mov.b v1[13], w11
; CHECK-NEXT: ldr w10, [sp, #320]
; CHECK-NEXT: ldr w11, [sp, #192]
; CHECK-NEXT: mov.b v0[14], w8
; CHECK-NEXT: ldr w8, [sp, #72]
; CHECK-NEXT: mov.b v3[14], w9
; CHECK-NEXT: mov.b v2[14], w10
; CHECK-NEXT: ldr w9, [sp, #456]
; CHECK-NEXT: mov.b v1[14], w11
; CHECK-NEXT: ldr w10, [sp, #328]
; CHECK-NEXT: ldr w11, [sp, #200]
; CHECK-NEXT: mov.b v0[15], w8
; CHECK-NEXT: mov.b v3[15], w9
; CHECK-NEXT: mov.b v2[15], w10
; CHECK-NEXT: mov.b v1[15], w11
; CHECK-NEXT: and.16b v0, v0, v4
; CHECK-NEXT: and.16b v2, v2, v4
; CHECK-NEXT: and.16b v3, v3, v4
; CHECK-NEXT: and.16b v1, v1, v4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = zext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
}
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
; CHECK-SD-LABEL: sext_v64i1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: .cfi_offset w29, -16
; CHECK-SD-NEXT: ldr w8, [sp, #336]
; CHECK-SD-NEXT: ldr w9, [sp, #208]
; CHECK-SD-NEXT: fmov s2, w0
; CHECK-SD-NEXT: ldr w10, [sp, #80]
; CHECK-SD-NEXT: ldr w11, [sp, #216]
; CHECK-SD-NEXT: ldr w12, [sp, #88]
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: fmov s1, w9
; CHECK-SD-NEXT: ldr w8, [sp, #344]
; CHECK-SD-NEXT: fmov s3, w10
; CHECK-SD-NEXT: mov.b v2[1], w1
; CHECK-SD-NEXT: ldr w9, [sp, #224]
; CHECK-SD-NEXT: ldr w10, [sp, #96]
; CHECK-SD-NEXT: mov.b v0[1], w8
; CHECK-SD-NEXT: mov.b v1[1], w11
; CHECK-SD-NEXT: ldr w8, [sp, #352]
; CHECK-SD-NEXT: mov.b v3[1], w12
; CHECK-SD-NEXT: ldr w11, [sp, #144]
; CHECK-SD-NEXT: mov.b v2[2], w2
; CHECK-SD-NEXT: mov.b v0[2], w8
; CHECK-SD-NEXT: mov.b v1[2], w9
; CHECK-SD-NEXT: ldr w8, [sp, #360]
; CHECK-SD-NEXT: mov.b v3[2], w10
; CHECK-SD-NEXT: ldr w9, [sp, #232]
; CHECK-SD-NEXT: ldr w10, [sp, #104]
; CHECK-SD-NEXT: mov.b v2[3], w3
; CHECK-SD-NEXT: mov.b v0[3], w8
; CHECK-SD-NEXT: mov.b v1[3], w9
; CHECK-SD-NEXT: ldr w8, [sp, #368]
; CHECK-SD-NEXT: mov.b v3[3], w10
; CHECK-SD-NEXT: ldr w9, [sp, #240]
; CHECK-SD-NEXT: ldr w10, [sp, #112]
; CHECK-SD-NEXT: mov.b v2[4], w4
; CHECK-SD-NEXT: mov.b v0[4], w8
; CHECK-SD-NEXT: mov.b v1[4], w9
; CHECK-SD-NEXT: ldr w8, [sp, #376]
; CHECK-SD-NEXT: mov.b v3[4], w10
; CHECK-SD-NEXT: ldr w9, [sp, #248]
; CHECK-SD-NEXT: ldr w10, [sp, #120]
; CHECK-SD-NEXT: mov.b v2[5], w5
; CHECK-SD-NEXT: mov.b v0[5], w8
; CHECK-SD-NEXT: mov.b v1[5], w9
; CHECK-SD-NEXT: ldr w8, [sp, #384]
; CHECK-SD-NEXT: mov.b v3[5], w10
; CHECK-SD-NEXT: ldr w9, [sp, #256]
; CHECK-SD-NEXT: ldr w10, [sp, #128]
; CHECK-SD-NEXT: mov.b v2[6], w6
; CHECK-SD-NEXT: mov.b v0[6], w8
; CHECK-SD-NEXT: mov.b v1[6], w9
; CHECK-SD-NEXT: ldr w8, [sp, #392]
; CHECK-SD-NEXT: mov.b v3[6], w10
; CHECK-SD-NEXT: ldr w9, [sp, #264]
; CHECK-SD-NEXT: ldr w10, [sp, #136]
; CHECK-SD-NEXT: mov.b v2[7], w7
; CHECK-SD-NEXT: mov.b v0[7], w8
; CHECK-SD-NEXT: mov.b v1[7], w9
; CHECK-SD-NEXT: ldr w8, [sp, #16]
; CHECK-SD-NEXT: mov.b v3[7], w10
; CHECK-SD-NEXT: ldr w9, [sp, #400]
; CHECK-SD-NEXT: ldr w10, [sp, #272]
; CHECK-SD-NEXT: mov.b v2[8], w8
; CHECK-SD-NEXT: ldr w8, [sp, #24]
; CHECK-SD-NEXT: mov.b v0[8], w9
; CHECK-SD-NEXT: mov.b v1[8], w10
; CHECK-SD-NEXT: ldr w9, [sp, #408]
; CHECK-SD-NEXT: mov.b v3[8], w11
; CHECK-SD-NEXT: ldr w10, [sp, #280]
; CHECK-SD-NEXT: ldr w11, [sp, #152]
; CHECK-SD-NEXT: mov.b v2[9], w8
; CHECK-SD-NEXT: ldr w8, [sp, #32]
; CHECK-SD-NEXT: mov.b v0[9], w9
; CHECK-SD-NEXT: mov.b v1[9], w10
; CHECK-SD-NEXT: ldr w9, [sp, #416]
; CHECK-SD-NEXT: mov.b v3[9], w11
; CHECK-SD-NEXT: ldr w10, [sp, #288]
; CHECK-SD-NEXT: ldr w11, [sp, #160]
; CHECK-SD-NEXT: mov.b v2[10], w8
; CHECK-SD-NEXT: ldr w8, [sp, #40]
; CHECK-SD-NEXT: mov.b v0[10], w9
; CHECK-SD-NEXT: mov.b v1[10], w10
; CHECK-SD-NEXT: ldr w9, [sp, #424]
; CHECK-SD-NEXT: mov.b v3[10], w11
; CHECK-SD-NEXT: ldr w10, [sp, #296]
; CHECK-SD-NEXT: ldr w11, [sp, #168]
; CHECK-SD-NEXT: mov.b v2[11], w8
; CHECK-SD-NEXT: ldr w8, [sp, #48]
; CHECK-SD-NEXT: mov.b v0[11], w9
; CHECK-SD-NEXT: mov.b v1[11], w10
; CHECK-SD-NEXT: ldr w9, [sp, #432]
; CHECK-SD-NEXT: mov.b v3[11], w11
; CHECK-SD-NEXT: ldr w10, [sp, #304]
; CHECK-SD-NEXT: ldr w11, [sp, #176]
; CHECK-SD-NEXT: mov.b v2[12], w8
; CHECK-SD-NEXT: ldr w8, [sp, #56]
; CHECK-SD-NEXT: mov.b v0[12], w9
; CHECK-SD-NEXT: mov.b v1[12], w10
; CHECK-SD-NEXT: ldr w9, [sp, #440]
; CHECK-SD-NEXT: mov.b v3[12], w11
; CHECK-SD-NEXT: ldr w10, [sp, #312]
; CHECK-SD-NEXT: ldr w11, [sp, #184]
; CHECK-SD-NEXT: mov.b v2[13], w8
; CHECK-SD-NEXT: ldr w8, [sp, #64]
; CHECK-SD-NEXT: mov.b v0[13], w9
; CHECK-SD-NEXT: mov.b v1[13], w10
; CHECK-SD-NEXT: ldr w9, [sp, #448]
; CHECK-SD-NEXT: mov.b v3[13], w11
; CHECK-SD-NEXT: ldr w10, [sp, #320]
; CHECK-SD-NEXT: ldr w11, [sp, #192]
; CHECK-SD-NEXT: mov.b v2[14], w8
; CHECK-SD-NEXT: ldr w8, [sp, #72]
; CHECK-SD-NEXT: mov.b v0[14], w9
; CHECK-SD-NEXT: mov.b v1[14], w10
; CHECK-SD-NEXT: ldr w9, [sp, #456]
; CHECK-SD-NEXT: mov.b v3[14], w11
; CHECK-SD-NEXT: ldr w10, [sp, #328]
; CHECK-SD-NEXT: ldr w11, [sp, #200]
; CHECK-SD-NEXT: mov.b v2[15], w8
; CHECK-SD-NEXT: mov.b v0[15], w9
; CHECK-SD-NEXT: mov.b v1[15], w10
; CHECK-SD-NEXT: mov.b v3[15], w11
; CHECK-SD-NEXT: shl.16b v2, v2, #7
; CHECK-SD-NEXT: shl.16b v4, v1, #7
; CHECK-SD-NEXT: shl.16b v5, v0, #7
; CHECK-SD-NEXT: shl.16b v3, v3, #7
; CHECK-SD-NEXT: cmlt.16b v0, v2, #0
; CHECK-SD-NEXT: cmlt.16b v2, v4, #0
; CHECK-SD-NEXT: cmlt.16b v1, v3, #0
; CHECK-SD-NEXT: cmlt.16b v3, v5, #0
; CHECK-SD-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v64i1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: ldr w13, [sp, #80]
; CHECK-GI-NEXT: ldr w11, [sp, #208]
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: ldr w9, [sp, #336]
; CHECK-GI-NEXT: ldr w8, [sp, #88]
; CHECK-GI-NEXT: ldr w10, [sp, #216]
; CHECK-GI-NEXT: fmov s1, w13
; CHECK-GI-NEXT: fmov s2, w11
; CHECK-GI-NEXT: ldr w12, [sp, #344]
; CHECK-GI-NEXT: fmov s3, w9
; CHECK-GI-NEXT: mov.b v0[1], w1
; CHECK-GI-NEXT: ldr w9, [sp, #224]
; CHECK-GI-NEXT: ldr w11, [sp, #400]
; CHECK-GI-NEXT: mov.b v1[1], w8
; CHECK-GI-NEXT: mov.b v2[1], w10
; CHECK-GI-NEXT: ldr w8, [sp, #96]
; CHECK-GI-NEXT: mov.b v3[1], w12
; CHECK-GI-NEXT: ldr w10, [sp, #352]
; CHECK-GI-NEXT: mov.b v0[2], w2
; CHECK-GI-NEXT: mov.b v1[2], w8
; CHECK-GI-NEXT: mov.b v2[2], w9
; CHECK-GI-NEXT: ldr w8, [sp, #104]
; CHECK-GI-NEXT: mov.b v3[2], w10
; CHECK-GI-NEXT: ldr w9, [sp, #232]
; CHECK-GI-NEXT: ldr w10, [sp, #360]
; CHECK-GI-NEXT: mov.b v0[3], w3
; CHECK-GI-NEXT: mov.b v1[3], w8
; CHECK-GI-NEXT: mov.b v2[3], w9
; CHECK-GI-NEXT: ldr w8, [sp, #112]
; CHECK-GI-NEXT: mov.b v3[3], w10
; CHECK-GI-NEXT: ldr w9, [sp, #240]
; CHECK-GI-NEXT: ldr w10, [sp, #368]
; CHECK-GI-NEXT: mov.b v0[4], w4
; CHECK-GI-NEXT: mov.b v1[4], w8
; CHECK-GI-NEXT: mov.b v2[4], w9
; CHECK-GI-NEXT: ldr w8, [sp, #120]
; CHECK-GI-NEXT: mov.b v3[4], w10
; CHECK-GI-NEXT: ldr w9, [sp, #248]
; CHECK-GI-NEXT: ldr w10, [sp, #376]
; CHECK-GI-NEXT: mov.b v0[5], w5
; CHECK-GI-NEXT: mov.b v1[5], w8
; CHECK-GI-NEXT: mov.b v2[5], w9
; CHECK-GI-NEXT: ldr w8, [sp, #128]
; CHECK-GI-NEXT: mov.b v3[5], w10
; CHECK-GI-NEXT: ldr w9, [sp, #256]
; CHECK-GI-NEXT: ldr w10, [sp, #384]
; CHECK-GI-NEXT: mov.b v0[6], w6
; CHECK-GI-NEXT: mov.b v1[6], w8
; CHECK-GI-NEXT: mov.b v2[6], w9
; CHECK-GI-NEXT: ldr w8, [sp, #136]
; CHECK-GI-NEXT: mov.b v3[6], w10
; CHECK-GI-NEXT: ldr w9, [sp, #264]
; CHECK-GI-NEXT: ldr w10, [sp, #392]
; CHECK-GI-NEXT: mov.b v0[7], w7
; CHECK-GI-NEXT: mov.b v1[7], w8
; CHECK-GI-NEXT: mov.b v2[7], w9
; CHECK-GI-NEXT: ldr w8, [sp, #16]
; CHECK-GI-NEXT: mov.b v3[7], w10
; CHECK-GI-NEXT: ldr w9, [sp, #144]
; CHECK-GI-NEXT: ldr w10, [sp, #272]
; CHECK-GI-NEXT: mov.b v0[8], w8
; CHECK-GI-NEXT: ldr w8, [sp, #24]
; CHECK-GI-NEXT: mov.b v1[8], w9
; CHECK-GI-NEXT: mov.b v2[8], w10
; CHECK-GI-NEXT: ldr w9, [sp, #152]
; CHECK-GI-NEXT: mov.b v3[8], w11
; CHECK-GI-NEXT: ldr w10, [sp, #280]
; CHECK-GI-NEXT: ldr w11, [sp, #408]
; CHECK-GI-NEXT: mov.b v0[9], w8
; CHECK-GI-NEXT: ldr w8, [sp, #32]
; CHECK-GI-NEXT: mov.b v1[9], w9
; CHECK-GI-NEXT: mov.b v2[9], w10
; CHECK-GI-NEXT: ldr w9, [sp, #160]
; CHECK-GI-NEXT: mov.b v3[9], w11
; CHECK-GI-NEXT: ldr w10, [sp, #288]
; CHECK-GI-NEXT: ldr w11, [sp, #416]
; CHECK-GI-NEXT: mov.b v0[10], w8
; CHECK-GI-NEXT: ldr w8, [sp, #40]
; CHECK-GI-NEXT: mov.b v1[10], w9
; CHECK-GI-NEXT: mov.b v2[10], w10
; CHECK-GI-NEXT: ldr w9, [sp, #168]
; CHECK-GI-NEXT: mov.b v3[10], w11
; CHECK-GI-NEXT: ldr w10, [sp, #296]
; CHECK-GI-NEXT: ldr w11, [sp, #424]
; CHECK-GI-NEXT: mov.b v0[11], w8
; CHECK-GI-NEXT: ldr w8, [sp, #48]
; CHECK-GI-NEXT: mov.b v1[11], w9
; CHECK-GI-NEXT: mov.b v2[11], w10
; CHECK-GI-NEXT: ldr w9, [sp, #176]
; CHECK-GI-NEXT: mov.b v3[11], w11
; CHECK-GI-NEXT: ldr w10, [sp, #304]
; CHECK-GI-NEXT: ldr w11, [sp, #432]
; CHECK-GI-NEXT: mov.b v0[12], w8
; CHECK-GI-NEXT: ldr w8, [sp, #56]
; CHECK-GI-NEXT: mov.b v1[12], w9
; CHECK-GI-NEXT: mov.b v2[12], w10
; CHECK-GI-NEXT: ldr w9, [sp, #184]
; CHECK-GI-NEXT: mov.b v3[12], w11
; CHECK-GI-NEXT: ldr w10, [sp, #312]
; CHECK-GI-NEXT: ldr w11, [sp, #440]
; CHECK-GI-NEXT: mov.b v0[13], w8
; CHECK-GI-NEXT: ldr w8, [sp, #64]
; CHECK-GI-NEXT: mov.b v1[13], w9
; CHECK-GI-NEXT: mov.b v2[13], w10
; CHECK-GI-NEXT: ldr w9, [sp, #192]
; CHECK-GI-NEXT: mov.b v3[13], w11
; CHECK-GI-NEXT: ldr w10, [sp, #320]
; CHECK-GI-NEXT: ldr w11, [sp, #448]
; CHECK-GI-NEXT: mov.b v0[14], w8
; CHECK-GI-NEXT: ldr w8, [sp, #72]
; CHECK-GI-NEXT: mov.b v1[14], w9
; CHECK-GI-NEXT: mov.b v2[14], w10
; CHECK-GI-NEXT: ldr w9, [sp, #200]
; CHECK-GI-NEXT: mov.b v3[14], w11
; CHECK-GI-NEXT: ldr w10, [sp, #328]
; CHECK-GI-NEXT: ldr w11, [sp, #456]
; CHECK-GI-NEXT: mov.b v0[15], w8
; CHECK-GI-NEXT: mov.b v1[15], w9
; CHECK-GI-NEXT: mov.b v2[15], w10
; CHECK-GI-NEXT: mov.b v3[15], w11
; CHECK-GI-NEXT: shl.16b v0, v0, #7
; CHECK-GI-NEXT: shl.16b v1, v1, #7
; CHECK-GI-NEXT: shl.16b v2, v2, #7
; CHECK-GI-NEXT: shl.16b v3, v3, #7
; CHECK-GI-NEXT: sshr.16b v0, v0, #7
; CHECK-GI-NEXT: sshr.16b v1, v1, #7
; CHECK-GI-NEXT: sshr.16b v2, v2, #7
; CHECK-GI-NEXT: sshr.16b v3, v3, #7
; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
%res = sext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
}
; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
; CHECK-SD-LABEL: sext_v1x64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: asr x1, x0, #63
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v1x64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: asr x1, x8, #63
; CHECK-GI-NEXT: ret
%res = sext <1 x i64> %arg to <1 x i128>
ret <1 x i128> %res
}