llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

505 lines
18 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE for 64-bit vectors.
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.8b, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
ret <8 x i8> %sel
}
; Don't use SVE for 128-bit vectors.
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.16b, w8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
ret <16 x i8> %sel
}
define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl32
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i8>, ptr %a
%op2 = load volatile <32 x i8>, ptr %b
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
store <32 x i8> %sel, ptr %a
ret void
}
define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov z0.b, w2
; VBITS_GE_256-NEXT: ptrue p0.b
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1b { z2.b }, p1/z, [x1, x8]
; VBITS_GE_256-NEXT: ld1b { z3.b }, p1/z, [x1]
; VBITS_GE_256-NEXT: sel z0.b, p0, z0.b, z2.b
; VBITS_GE_256-NEXT: sel z1.b, p0, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z0.b }, p1, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v64i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: mov z0.b, w2
; VBITS_GE_512-NEXT: ptrue p0.b
; VBITS_GE_512-NEXT: ptrue p1.b, vl64
; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, #0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1b { z1.b }, p1/z, [x1]
; VBITS_GE_512-NEXT: sel z0.b, p0, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <64 x i8>, ptr %a
%op2 = load volatile <64 x i8>, ptr %b
%sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
store <64 x i8> %sel, ptr %a
ret void
}
define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl128
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <128 x i8>, ptr %a
%op2 = load volatile <128 x i8>, ptr %b
%sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
store <128 x i8> %sel, ptr %a
ret void
}
define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl256
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <256 x i8>, ptr %a
%op2 = load volatile <256 x i8>, ptr %b
%sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
store <256 x i8> %sel, ptr %a
ret void
}
; Don't use SVE for 64-bit vectors.
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.4h, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
ret <4 x i16> %sel
}
; Don't use SVE for 128-bit vectors.
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
ret <8 x i16> %sel
}
define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, w2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl16
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x i16>, ptr %a
%op2 = load volatile <16 x i16>, ptr %b
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
store <16 x i16> %sel, ptr %a
ret void
}
define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov z0.h, w2
; VBITS_GE_256-NEXT: ptrue p0.h
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1]
; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h
; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v32i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: mov z0.h, w2
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: ptrue p1.h, vl32
; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1]
; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <32 x i16>, ptr %a
%op2 = load volatile <32 x i16>, ptr %b
%sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
store <32 x i16> %sel, ptr %a
ret void
}
define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, w2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x i16>, ptr %a
%op2 = load volatile <64 x i16>, ptr %b
%sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
store <64 x i16> %sel, ptr %a
ret void
}
define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, w2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl128
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <128 x i16>, ptr %a
%op2 = load volatile <128 x i16>, ptr %b
%sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
store <128 x i16> %sel, ptr %a
ret void
}
; Don't use SVE for 64-bit vectors.
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
ret <2 x i32> %sel
}
; Don't use SVE for 128-bit vectors.
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
ret <4 x i32> %sel
}
define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl8
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <8 x i32>, ptr %a
%op2 = load volatile <8 x i32>, ptr %b
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
store <8 x i32> %sel, ptr %a
ret void
}
define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: and w8, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: mov z0.s, w8
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1]
; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s
; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: and w8, w2, #0x1
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: mov z0.s, w8
; VBITS_GE_512-NEXT: ptrue p1.s, vl16
; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1]
; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <16 x i32>, ptr %a
%op2 = load volatile <16 x i32>, ptr %b
%sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
store <16 x i32> %sel, ptr %a
ret void
}
define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl32
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i32>, ptr %a
%op2 = load volatile <32 x i32>, ptr %b
%sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
store <32 x i32> %sel, ptr %a
ret void
}
define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl64
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x i32>, ptr %a
%op2 = load volatile <64 x i32>, ptr %b
%sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
store <64 x i32> %sel, ptr %a
ret void
}
; Don't use SVE for 64-bit vectors.
define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
ret <1 x i64> %sel
}
; Don't use SVE for 128-bit vectors.
define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
ret <2 x i64> %sel
}
define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: and x8, x2, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl4
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <4 x i64>, ptr %a
%op2 = load volatile <4 x i64>, ptr %b
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
store <4 x i64> %sel, ptr %a
ret void
}
define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2
; VBITS_GE_256-NEXT: and x8, x2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.d
; VBITS_GE_256-NEXT: mov z0.d, x8
; VBITS_GE_256-NEXT: ptrue p1.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d
; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2
; VBITS_GE_512-NEXT: and x8, x2, #0x1
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: mov z0.d, x8
; VBITS_GE_512-NEXT: ptrue p1.d, vl8
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <8 x i64>, ptr %a
%op2 = load volatile <8 x i64>, ptr %b
%sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
store <8 x i64> %sel, ptr %a
ret void
}
define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: and x8, x2, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl16
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x i64>, ptr %a
%op2 = load volatile <16 x i64>, ptr %b
%sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
store <16 x i64> %sel, ptr %a
ret void
}
define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: and x8, x2, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i64>, ptr %a
%op2 = load volatile <32 x i64>, ptr %b
%sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
store <32 x i64> %sel, ptr %a
ret void
}
attributes #0 = { "target-features"="+sve" }