llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

1167 lines
37 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
target triple = "aarch64-unknown-linux-gnu"
;
; ANDV
;
; No single instruction NEON ANDV support. Use SVE.
define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
ret i8 %res
}
; No single instruction NEON ANDV support. Use SVE.
define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
ret i8 %res
}
define i8 @andv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
ret i8 %res
}
define i8 @andv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: andv_v64i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: andv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %op)
ret i8 %res
}
define i8 @andv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %op)
ret i8 %res
}
define i8 @andv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %op)
ret i8 %res
}
; No single instruction NEON ANDV support. Use SVE.
define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
ret i16 %res
}
; No single instruction NEON ANDV support. Use SVE.
define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
ret i16 %res
}
define i16 @andv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
ret i16 %res
}
define i16 @andv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: andv_v32i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: andv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %op)
ret i16 %res
}
define i16 @andv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %op)
ret i16 %res
}
define i16 @andv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %op)
ret i16 %res
}
; No single instruction NEON ANDV support. Use SVE.
define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
ret i32 %res
}
; No single instruction NEON ANDV support. Use SVE.
define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
ret i32 %res
}
define i32 @andv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
ret i32 %res
}
define i32 @andv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: andv_v16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: andv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %op)
ret i32 %res
}
define i32 @andv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %op)
ret i32 %res
}
define i32 @andv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> %op)
ret i32 %res
}
; Nothing to do for single element vectors.
define i64 @andv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
ret i64 %res
}
; Use SVE for 128-bit vectors
define i64 @andv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
ret i64 %res
}
define i64 @andv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
ret i64 %res
}
define i64 @andv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: andv_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: andv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
%op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %op)
ret i64 %res
}
define i64 @andv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %op)
ret i64 %res
}
define i64 @andv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> %op)
ret i64 %res
}
;
; EORV
;
; No single instruction NEON EORV support. Use SVE.
define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
ret i8 %res
}
; No single instruction NEON EORV support. Use SVE.
define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
ret i8 %res
}
define i8 @eorv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
ret i8 %res
}
define i8 @eorv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: eorv_v64i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: eorv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %op)
ret i8 %res
}
define i8 @eorv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %op)
ret i8 %res
}
define i8 @eorv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %op)
ret i8 %res
}
; No single instruction NEON EORV support. Use SVE.
define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
ret i16 %res
}
; No single instruction NEON EORV support. Use SVE.
define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
ret i16 %res
}
define i16 @eorv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
ret i16 %res
}
define i16 @eorv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: eorv_v32i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: eorv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %op)
ret i16 %res
}
define i16 @eorv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %op)
ret i16 %res
}
define i16 @eorv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %op)
ret i16 %res
}
; No single instruction NEON EORV support. Use SVE.
define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
ret i32 %res
}
; No single instruction NEON EORV support. Use SVE.
define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
ret i32 %res
}
define i32 @eorv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
ret i32 %res
}
define i32 @eorv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: eorv_v16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: eorv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %op)
ret i32 %res
}
define i32 @eorv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %op)
ret i32 %res
}
define i32 @eorv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> %op)
ret i32 %res
}
; Nothing to do for single element vectors.
define i64 @eorv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %a)
ret i64 %res
}
; Use SVE for 128-bit vectors
define i64 @eorv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
ret i64 %res
}
define i64 @eorv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
ret i64 %res
}
define i64 @eorv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: eorv_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: eorv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
%op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %op)
ret i64 %res
}
define i64 @eorv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %op)
ret i64 %res
}
define i64 @eorv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> %op)
ret i64 %res
}
;
; ORV
;
; No single instruction NEON ORV support. Use SVE.
define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
ret i8 %res
}
; No single instruction NEON ORV support. Use SVE.
define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
ret i8 %res
}
define i8 @orv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
ret i8 %res
}
define i8 @orv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: orv_v64i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: orv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %op)
ret i8 %res
}
define i8 @orv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %op)
ret i8 %res
}
define i8 @orv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %op)
ret i8 %res
}
; No single instruction NEON ORV support. Use SVE.
define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
ret i16 %res
}
; No single instruction NEON ORV support. Use SVE.
define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
ret i16 %res
}
define i16 @orv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
ret i16 %res
}
define i16 @orv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: orv_v32i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: orv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %op)
ret i16 %res
}
define i16 @orv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %op)
ret i16 %res
}
define i16 @orv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %op)
ret i16 %res
}
; No single instruction NEON ORV support. Use SVE.
define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
ret i32 %res
}
; No single instruction NEON ORV support. Use SVE.
define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
ret i32 %res
}
define i32 @orv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
ret i32 %res
}
define i32 @orv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: orv_v16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: orv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
%op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %op)
ret i32 %res
}
define i32 @orv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %op)
ret i32 %res
}
define i32 @orv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> %op)
ret i32 %res
}
; Nothing to do for single element vectors.
define i64 @orv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %a)
ret i64 %res
}
; Use SVE for 128-bit vectors
define i64 @orv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
ret i64 %res
}
define i64 @orv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
ret i64 %res
}
define i64 @orv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: orv_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: orv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
%op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %op)
ret i64 %res
}
define i64 @orv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %op)
ret i64 %res
}
define i64 @orv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> %op)
ret i64 %res
}
attributes #0 = { "target-features"="+sve" }
declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)
declare i8 @llvm.vector.reduce.and.v256i8(<256 x i8>)
declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
declare i16 @llvm.vector.reduce.and.v128i16(<128 x i16>)
declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
declare i32 @llvm.vector.reduce.and.v64i32(<64 x i32>)
declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
declare i64 @llvm.vector.reduce.and.v32i64(<32 x i64>)
declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
declare i8 @llvm.vector.reduce.or.v256i8(<256 x i8>)
declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
declare i16 @llvm.vector.reduce.or.v128i16(<128 x i16>)
declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
declare i32 @llvm.vector.reduce.or.v64i32(<64 x i32>)
declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
declare i64 @llvm.vector.reduce.or.v32i64(<32 x i64>)
declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>)
declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>)
declare i8 @llvm.vector.reduce.xor.v256i8(<256 x i8>)
declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>)
declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>)
declare i16 @llvm.vector.reduce.xor.v128i16(<128 x i16>)
declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)
declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>)
declare i32 @llvm.vector.reduce.xor.v64i32(<64 x i32>)
declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>)
declare i64 @llvm.vector.reduce.xor.v32i64(<32 x i64>)