
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
505 lines
18 KiB
LLVM
505 lines
18 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.8b, w8
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
|
|
ret <8 x i8> %sel
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.16b, w8
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
|
|
ret <16 x i8> %sel
|
|
}
|
|
|
|
define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v32i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.b, w2
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ptrue p1.b, vl32
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
|
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
|
|
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <32 x i8>, ptr %a
|
|
%op2 = load volatile <32 x i8>, ptr %b
|
|
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
|
|
store <32 x i8> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
|
|
; VBITS_GE_256-LABEL: select_v64i8:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: mov z0.b, w2
|
|
; VBITS_GE_256-NEXT: ptrue p0.b
|
|
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
|
|
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
|
|
; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
|
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0, x8]
|
|
; VBITS_GE_256-NEXT: ld1b { z1.b }, p1/z, [x0]
|
|
; VBITS_GE_256-NEXT: ld1b { z2.b }, p1/z, [x1, x8]
|
|
; VBITS_GE_256-NEXT: ld1b { z3.b }, p1/z, [x1]
|
|
; VBITS_GE_256-NEXT: sel z0.b, p0, z0.b, z2.b
|
|
; VBITS_GE_256-NEXT: sel z1.b, p0, z1.b, z3.b
|
|
; VBITS_GE_256-NEXT: st1b { z0.b }, p1, [x0, x8]
|
|
; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: select_v64i8:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: mov z0.b, w2
|
|
; VBITS_GE_512-NEXT: ptrue p0.b
|
|
; VBITS_GE_512-NEXT: ptrue p1.b, vl64
|
|
; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
|
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0]
|
|
; VBITS_GE_512-NEXT: ld1b { z1.b }, p1/z, [x1]
|
|
; VBITS_GE_512-NEXT: sel z0.b, p0, z0.b, z1.b
|
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x0]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load volatile <64 x i8>, ptr %a
|
|
%op2 = load volatile <64 x i8>, ptr %b
|
|
%sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
|
|
store <64 x i8> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: select_v128i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.b, w2
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ptrue p1.b, vl128
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
|
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
|
|
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <128 x i8>, ptr %a
|
|
%op2 = load volatile <128 x i8>, ptr %b
|
|
%sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
|
|
store <128 x i8> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: select_v256i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.b, w2
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ptrue p1.b, vl256
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
|
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
|
|
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <256 x i8>, ptr %a
|
|
%op2 = load volatile <256 x i8>, ptr %b
|
|
%sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
|
|
store <256 x i8> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.4h, w8
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
|
|
ret <4 x i16> %sel
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.8h, w8
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
|
|
ret <8 x i16> %sel
|
|
}
|
|
|
|
define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.h, w2
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ptrue p1.h, vl16
|
|
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
|
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
|
|
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <16 x i16>, ptr %a
|
|
%op2 = load volatile <16 x i16>, ptr %b
|
|
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
|
|
store <16 x i16> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
|
|
; VBITS_GE_256-LABEL: select_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: mov z0.h, w2
|
|
; VBITS_GE_256-NEXT: ptrue p0.h
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
|
|
; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
|
|
; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0]
|
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1]
|
|
; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h
|
|
; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: select_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: mov z0.h, w2
|
|
; VBITS_GE_512-NEXT: ptrue p0.h
|
|
; VBITS_GE_512-NEXT: ptrue p1.h, vl32
|
|
; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
|
|
; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
|
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1]
|
|
; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load volatile <32 x i16>, ptr %a
|
|
%op2 = load volatile <32 x i16>, ptr %b
|
|
%sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
|
|
store <32 x i16> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: select_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.h, w2
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ptrue p1.h, vl64
|
|
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
|
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
|
|
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <64 x i16>, ptr %a
|
|
%op2 = load volatile <64 x i16>, ptr %b
|
|
%sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
|
|
store <64 x i16> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: select_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov z0.h, w2
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ptrue p1.h, vl128
|
|
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
|
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
|
|
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <128 x i16>, ptr %a
|
|
%op2 = load volatile <128 x i16>, ptr %b
|
|
%sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
|
|
store <128 x i16> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.2s, w8
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
|
|
ret <2 x i32> %sel
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm w8, ne
|
|
; CHECK-NEXT: dup v2.4s, w8
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
|
|
ret <4 x i32> %sel
|
|
}
|
|
|
|
define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w2, #0x1
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: mov z0.s, w8
|
|
; CHECK-NEXT: ptrue p1.s, vl8
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
|
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
|
|
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <8 x i32>, ptr %a
|
|
%op2 = load volatile <8 x i32>, ptr %b
|
|
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
|
|
store <8 x i32> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
|
|
; VBITS_GE_256-LABEL: select_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: and w8, w2, #0x1
|
|
; VBITS_GE_256-NEXT: ptrue p0.s
|
|
; VBITS_GE_256-NEXT: mov z0.s, w8
|
|
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0]
|
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1]
|
|
; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s
|
|
; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: select_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: and w8, w2, #0x1
|
|
; VBITS_GE_512-NEXT: ptrue p0.s
|
|
; VBITS_GE_512-NEXT: mov z0.s, w8
|
|
; VBITS_GE_512-NEXT: ptrue p1.s, vl16
|
|
; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
|
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1]
|
|
; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load volatile <16 x i32>, ptr %a
|
|
%op2 = load volatile <16 x i32>, ptr %b
|
|
%sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
|
|
store <16 x i32> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: select_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w2, #0x1
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: mov z0.s, w8
|
|
; CHECK-NEXT: ptrue p1.s, vl32
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
|
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
|
|
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <32 x i32>, ptr %a
|
|
%op2 = load volatile <32 x i32>, ptr %b
|
|
%sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
|
|
store <32 x i32> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: select_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w8, w2, #0x1
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: mov z0.s, w8
|
|
; CHECK-NEXT: ptrue p1.s, vl64
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
|
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
|
|
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <64 x i32>, ptr %a
|
|
%op2 = load volatile <64 x i32>, ptr %b
|
|
%sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
|
|
store <64 x i32> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm x8, ne
|
|
; CHECK-NEXT: fmov d2, x8
|
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
|
|
ret <1 x i64> %sel
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tst w0, #0x1
|
|
; CHECK-NEXT: csetm x8, ne
|
|
; CHECK-NEXT: dup v2.2d, x8
|
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
|
|
ret <2 x i64> %sel
|
|
}
|
|
|
|
define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: select_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
|
; CHECK-NEXT: and x8, x2, #0x1
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: mov z0.d, x8
|
|
; CHECK-NEXT: ptrue p1.d, vl4
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
|
|
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <4 x i64>, ptr %a
|
|
%op2 = load volatile <4 x i64>, ptr %b
|
|
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
|
|
store <4 x i64> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
|
|
; VBITS_GE_256-LABEL: select_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2
|
|
; VBITS_GE_256-NEXT: and x8, x2, #0x1
|
|
; VBITS_GE_256-NEXT: ptrue p0.d
|
|
; VBITS_GE_256-NEXT: mov z0.d, x8
|
|
; VBITS_GE_256-NEXT: ptrue p1.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0]
|
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
|
|
; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d
|
|
; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: select_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2
|
|
; VBITS_GE_512-NEXT: and x8, x2, #0x1
|
|
; VBITS_GE_512-NEXT: ptrue p0.d
|
|
; VBITS_GE_512-NEXT: mov z0.d, x8
|
|
; VBITS_GE_512-NEXT: ptrue p1.d, vl8
|
|
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
|
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
|
|
; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load volatile <8 x i64>, ptr %a
|
|
%op2 = load volatile <8 x i64>, ptr %b
|
|
%sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
|
|
store <8 x i64> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: select_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
|
; CHECK-NEXT: and x8, x2, #0x1
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: mov z0.d, x8
|
|
; CHECK-NEXT: ptrue p1.d, vl16
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
|
|
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <16 x i64>, ptr %a
|
|
%op2 = load volatile <16 x i64>, ptr %b
|
|
%sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
|
|
store <16 x i64> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: select_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
|
; CHECK-NEXT: and x8, x2, #0x1
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: mov z0.d, x8
|
|
; CHECK-NEXT: ptrue p1.d, vl32
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
|
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
|
|
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
|
|
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
|
|
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load volatile <32 x i64>, ptr %a
|
|
%op2 = load volatile <32 x i64>, ptr %b
|
|
%sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
|
|
store <32 x i64> %sel, ptr %a
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|