
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
1670 lines
58 KiB
LLVM
1670 lines
58 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; FADDA
|
|
;
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define half @fadda_v4f16(half %start, <4 x half> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
|
; CHECK-NEXT: fadda h0, p0, h0, z1.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define half @fadda_v8f16(half %start, <8 x half> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: fadda h0, p0, h0, z1.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fadda_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fadda_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda h0, p0, h0, z1.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fadda_v32f16(half %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fadda_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadda h0, p0, h0, z1.h
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: fadda h0, p0, h0, z1.h
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fadda_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fadda h0, p0, h0, z1.h
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fadda_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fadda_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda h0, p0, h0, z1.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fadda_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fadda_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda h0, p0, h0, z1.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define float @fadda_v2f32(float %start, <2 x float> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
|
; CHECK-NEXT: fadda s0, p0, s0, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define float @fadda_v4f32(float %start, <4 x float> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: fadda s0, p0, s0, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fadda_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fadda_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda s0, p0, s0, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fadda_v16f32(float %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fadda_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadda s0, p0, s0, z1.s
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: fadda s0, p0, s0, z1.s
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fadda_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fadda s0, p0, s0, z1.s
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fadda_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fadda_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda s0, p0, s0, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fadda_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fadda_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda s0, p0, s0, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define double @fadda_v1f64(double %start, <1 x double> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; No single instruction NEON support. Use SVE.
|
|
define double @fadda_v2f64(double %start, <2 x double> %a) vscale_range(1,0) #0 {
|
|
; CHECK-LABEL: fadda_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: fadda d0, p0, d0, z1.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fadda_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fadda_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda d0, p0, d0, z1.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fadda_v8f64(double %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fadda_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadda d0, p0, d0, z1.d
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: fadda d0, p0, d0, z1.d
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fadda_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fadda d0, p0, d0, z1.d
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fadda_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fadda_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda d0, p0, d0, z1.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fadda_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fadda_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fadda d0, p0, d0, z1.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
;
|
|
; FADDV
|
|
;
|
|
|
|
; No single instruction NEON support for 4 element vectors.
|
|
define half @faddv_v4f16(half %start, <4 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl4
|
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
|
|
; CHECK-NEXT: faddv h1, p0, z1.h
|
|
; CHECK-NEXT: fadd h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
; No single instruction NEON support for 8 element vectors.
|
|
define half @faddv_v8f16(half %start, <8 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl8
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: faddv h1, p0, z1.h
|
|
; CHECK-NEXT: fadd h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @faddv_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv h1, p0, z1.h
|
|
; CHECK-NEXT: fadd h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @faddv_v32f16(half %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: faddv_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadd z1.h, p0/m, z1.h, z2.h
|
|
; VBITS_GE_256-NEXT: faddv h1, p0, z1.h
|
|
; VBITS_GE_256-NEXT: fadd h0, h0, h1
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: faddv_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: faddv h1, p0, z1.h
|
|
; VBITS_GE_512-NEXT: fadd h0, h0, h1
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call fast half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @faddv_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: faddv_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv h1, p0, z1.h
|
|
; CHECK-NEXT: fadd h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call fast half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @faddv_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: faddv_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv h1, p0, z1.h
|
|
; CHECK-NEXT: fadd h0, h0, h1
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call fast half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; Don't use SVE for 2 element vectors.
|
|
define float @faddv_v2f32(float %start, <2 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: faddp s1, v1.2s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; No single instruction NEON support for 4 element vectors.
|
|
define float @faddv_v4f32(float %start, <4 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: faddv s1, p0, z1.s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @faddv_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv s1, p0, z1.s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @faddv_v16f32(float %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: faddv_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadd z1.s, p0/m, z1.s, z2.s
|
|
; VBITS_GE_256-NEXT: faddv s1, p0, z1.s
|
|
; VBITS_GE_256-NEXT: fadd s0, s0, s1
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: faddv_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: faddv s1, p0, z1.s
|
|
; VBITS_GE_512-NEXT: fadd s0, s0, s1
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call fast float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @faddv_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: faddv_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv s1, p0, z1.s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call fast float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @faddv_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: faddv_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv s1, p0, z1.s
|
|
; CHECK-NEXT: fadd s0, s0, s1
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call fast float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; Don't use SVE for 1 element vectors.
|
|
define double @faddv_v1f64(double %start, <1 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; Don't use SVE for 2 element vectors.
|
|
define double @faddv_v2f64(double %start, <2 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: faddp d1, v1.2d
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @faddv_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: faddv_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv d1, p0, z1.d
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @faddv_v8f64(double %start, ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: faddv_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fadd z1.d, p0/m, z1.d, z2.d
|
|
; VBITS_GE_256-NEXT: faddv d1, p0, z1.d
|
|
; VBITS_GE_256-NEXT: fadd d0, d0, d1
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: faddv_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: faddv d1, p0, z1.d
|
|
; VBITS_GE_512-NEXT: fadd d0, d0, d1
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call fast double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @faddv_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: faddv_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv d1, p0, z1.d
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call fast double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @faddv_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: faddv_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: faddv d1, p0, z1.d
|
|
; CHECK-NEXT: fadd d0, d0, d1
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call fast double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
;
|
|
; FMAXNMV
|
|
;
|
|
|
|
; No NEON 16-bit vector FMAXNMV support. Use SVE.
|
|
define half @fmaxv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxnmv h0, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
; No NEON 16-bit vector FMAXNMV support. Use SVE.
|
|
define half @fmaxv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxnmv h0, v0.8h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaxv_v16f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaxv_v32f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaxv_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
|
|
; VBITS_GE_256-NEXT: fmaxnmv h0, p0, z0.h
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaxv_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxnmv h0, p0, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaxv_v64f16(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaxv_v128f16(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; Don't use SVE for 64-bit f32 vectors.
|
|
define float @fmaxv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxnmp s0, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f32 vectors.
|
|
define float @fmaxv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxnmv s0, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaxv_v8f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaxv_v16f32(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaxv_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
|
|
; VBITS_GE_256-NEXT: fmaxnmv s0, p0, z0.s
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaxv_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxnmv s0, p0, z0.s
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaxv_v32f32(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaxv_v64f32(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; Nothing to do for single element vectors.
|
|
define double @fmaxv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f64 vectors.
|
|
define double @fmaxv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxnmp d0, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaxv_v4f64(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaxv_v8f64(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaxv_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
|
|
; VBITS_GE_256-NEXT: fmaxnmv d0, p0, z0.d
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaxv_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxnmv d0, p0, z0.d
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaxv_v16f64(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaxv_v32f64(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaxv_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
;
|
|
; FMINNMV
|
|
;
|
|
|
|
; No NEON 16-bit vector FMINNMV support. Use SVE.
|
|
define half @fminv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminnmv h0, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
; No NEON 16-bit vector FMINNMV support. Use SVE.
|
|
define half @fminv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminnmv h0, v0.8h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminv_v16f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminv_v32f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminv_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
|
|
; VBITS_GE_256-NEXT: fminnmv h0, p0, z0.h
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminv_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminnmv h0, p0, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminv_v64f16(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminv_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminv_v128f16(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminv_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; Don't use SVE for 64-bit f32 vectors.
|
|
define float @fminv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminnmp s0, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f32 vectors.
|
|
define float @fminv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminnmv s0, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminv_v8f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminv_v16f32(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminv_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
|
|
; VBITS_GE_256-NEXT: fminnmv s0, p0, z0.s
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminv_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminnmv s0, p0, z0.s
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminv_v32f32(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminv_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminv_v64f32(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminv_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; Nothing to do for single element vectors.
|
|
define double @fminv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f64 vectors.
|
|
define double @fminv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminnmp d0, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminv_v4f64(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminv_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminv_v8f64(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminv_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
|
|
; VBITS_GE_256-NEXT: fminnmv d0, p0, z0.d
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminv_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminnmv d0, p0, z0.d
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminv_v16f64(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminv_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminv_v32f64(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminv_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminnmv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
;
|
|
; FMAXV
|
|
;
|
|
|
|
define half @fmaximumv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxv h0, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaximumv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxv h0, v0.8h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaximumv_v16f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaximumv_v32f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaximumv_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h
|
|
; VBITS_GE_256-NEXT: fmaxv h0, p0, z0.h
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaximumv_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxv h0, p0, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmaximum.v32f16(<32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaximumv_v64f16(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmaximum.v64f16(<64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fmaximumv_v128f16(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fmaximum.v128f16(<128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; Don't use SVE for 64-bit f32 vectors.
|
|
define float @fmaximumv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxp s0, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f32 vectors.
|
|
define float @fmaximumv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxv s0, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaximumv_v8f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaximumv_v16f32(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaximumv_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s
|
|
; VBITS_GE_256-NEXT: fmaxv s0, p0, z0.s
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaximumv_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxv s0, p0, z0.s
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaximumv_v32f32(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fmaximumv_v64f32(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; Nothing to do for single element vectors.
|
|
define double @fmaximumv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f64 vectors.
|
|
define double @fmaximumv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmaxp d0, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaximumv_v4f64(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaximumv_v8f64(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fmaximumv_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d
|
|
; VBITS_GE_256-NEXT: fmaxv d0, p0, z0.d
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fmaximumv_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fmaxv d0, p0, z0.d
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaximumv_v16f64(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fmaximumv_v32f64(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fmaximumv_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fmaxv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
;
|
|
; FMINV
|
|
;
|
|
|
|
define half @fminimumv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminv h0, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminimumv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminv h0, v0.8h
|
|
; CHECK-NEXT: ret
|
|
%res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminimumv_v16f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminimumv_v32f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminimumv_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h
|
|
; VBITS_GE_256-NEXT: fminv h0, p0, z0.h
|
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminimumv_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminv h0, p0, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <32 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fminimum.v32f16(<32 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminimumv_v64f16(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fminimum.v64f16(<64 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
define half @fminimumv_v128f16(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv h0, p0, z0.h
|
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <128 x half>, ptr %a
|
|
%res = call half @llvm.vector.reduce.fminimum.v128f16(<128 x half> %op)
|
|
ret half %res
|
|
}
|
|
|
|
; Don't use SVE for 64-bit f32 vectors.
|
|
define float @fminimumv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminp s0, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f32 vectors.
|
|
define float @fminimumv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminv s0, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminimumv_v8f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <8 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminimumv_v16f32(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminimumv_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s
|
|
; VBITS_GE_256-NEXT: fminv s0, p0, z0.s
|
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminimumv_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminv s0, p0, z0.s
|
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <16 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminimumv_v32f32(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
define float @fminimumv_v64f32(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv s0, p0, z0.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <64 x float>, ptr %a
|
|
%res = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %op)
|
|
ret float %res
|
|
}
|
|
|
|
; Nothing to do for single element vectors.
|
|
define double @fminimumv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit f64 vectors.
|
|
define double @fminimumv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fminp d0, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminimumv_v4f64(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <4 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminimumv_v8f64(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fminimumv_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d
|
|
; VBITS_GE_256-NEXT: fminv d0, p0, z0.d
|
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fminimumv_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fminv d0, p0, z0.d
|
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op = load <8 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminimumv_v16f64(ptr %a) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <16 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
define double @fminimumv_v32f64(ptr %a) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fminimumv_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fminv d0, p0, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op = load <32 x double>, ptr %a
|
|
%res = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %op)
|
|
ret double %res
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|
|
|
|
declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
|
|
declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
|
|
declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
|
|
declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
|
|
declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
|
|
declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
|
|
|
|
declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
|
|
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
|
|
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
|
|
declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
|
|
declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
|
|
declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
|
|
|
|
declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
|
|
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
|
|
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
|
|
declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
|
|
declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
|
|
declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
|
|
|
|
declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
|
|
declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
|
|
declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
|
|
declare half @llvm.vector.reduce.fmax.v32f16(<32 x half>)
|
|
declare half @llvm.vector.reduce.fmax.v64f16(<64 x half>)
|
|
declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>)
|
|
|
|
declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v64f32(<64 x float>)
|
|
|
|
declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
|
|
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
|
|
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
|
|
declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
|
|
declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
|
|
declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
|
|
|
|
declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
|
|
declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
|
|
declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
|
|
declare half @llvm.vector.reduce.fmin.v32f16(<32 x half>)
|
|
declare half @llvm.vector.reduce.fmin.v64f16(<64 x half>)
|
|
declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>)
|
|
|
|
declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v64f32(<64 x float>)
|
|
|
|
declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
|
|
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
|
|
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
|
|
declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
|
|
declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
|
|
declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
|
|
|
|
declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
|
|
declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
|
|
declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
|
|
declare half @llvm.vector.reduce.fmaximum.v32f16(<32 x half>)
|
|
declare half @llvm.vector.reduce.fmaximum.v64f16(<64 x half>)
|
|
declare half @llvm.vector.reduce.fmaximum.v128f16(<128 x half>)
|
|
|
|
declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
|
|
declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
|
|
declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
|
|
declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float>)
|
|
declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>)
|
|
declare float @llvm.vector.reduce.fmaximum.v64f32(<64 x float>)
|
|
|
|
declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>)
|
|
declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
|
|
declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
|
|
declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>)
|
|
declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>)
|
|
declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>)
|
|
|
|
declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
|
|
declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
|
|
declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
|
|
declare half @llvm.vector.reduce.fminimum.v32f16(<32 x half>)
|
|
declare half @llvm.vector.reduce.fminimum.v64f16(<64 x half>)
|
|
declare half @llvm.vector.reduce.fminimum.v128f16(<128 x half>)
|
|
|
|
declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
|
|
declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
|
|
declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
|
|
declare float @llvm.vector.reduce.fminimum.v16f32(<16 x float>)
|
|
declare float @llvm.vector.reduce.fminimum.v32f32(<32 x float>)
|
|
declare float @llvm.vector.reduce.fminimum.v64f32(<64 x float>)
|
|
|
|
declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>)
|
|
declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
|
|
declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
|
|
declare double @llvm.vector.reduce.fminimum.v8f64(<8 x double>)
|
|
declare double @llvm.vector.reduce.fminimum.v16f64(<16 x double>)
|
|
declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>)
|