
This extends the reduction logic in the vectorizer to handle intrinsic versions of min and max, both the floating point variants already created by instcombine under fastmath and the integer variants from D98152. As a bonus this allows us to match a chain of min or max operations into a single reduction, similar to how add/mul/etc work. Differential Revision: https://reviews.llvm.org/D109645
1105 lines
37 KiB
LLVM
1105 lines
37 KiB
LLVM
; RUN: opt -S -loop-vectorize -dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
@A = common global [1024 x i32] zeroinitializer, align 16
|
|
@fA = common global [1024 x float] zeroinitializer, align 16
|
|
@dA = common global [1024 x double] zeroinitializer, align 16
|
|
|
|
; Signed tests.
|
|
|
|
; Turn this into a max reduction. Make sure we use a splat to initialize the
|
|
; vector for the reduction.
|
|
; CHECK-LABEL: @max_red(
|
|
; CHECK: %[[VAR:.*]] = insertelement <2 x i32> poison, i32 %max, i32 0
|
|
; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK: icmp sgt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smax.v2i32
|
|
|
|
define i32 @max_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp sgt i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a max reduction. The select has its inputs reversed therefore
|
|
; this is a max reduction.
|
|
; CHECK-LABEL: @max_red_inverse_select(
|
|
; CHECK: icmp slt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smax.v2i32
|
|
|
|
define i32 @max_red_inverse_select(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp slt i32 %max.red.08, %0
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a min reduction.
|
|
; CHECK-LABEL: @min_red(
|
|
; CHECK: icmp slt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smin.v2i32
|
|
|
|
define i32 @min_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp slt i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a min reduction. The select has its inputs reversed therefore
|
|
; this is a min reduction.
|
|
; CHECK-LABEL: @min_red_inverse_select(
|
|
; CHECK: icmp sgt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smin.v2i32
|
|
|
|
define i32 @min_red_inverse_select(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp sgt i32 %max.red.08, %0
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Unsigned tests.
|
|
|
|
; Turn this into a max reduction.
|
|
; CHECK-LABEL: @umax_red(
|
|
; CHECK: icmp ugt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umax.v2i32
|
|
|
|
define i32 @umax_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp ugt i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a max reduction. The select has its inputs reversed therefore
|
|
; this is a max reduction.
|
|
; CHECK-LABEL: @umax_red_inverse_select(
|
|
; CHECK: icmp ult <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umax.v2i32
|
|
|
|
define i32 @umax_red_inverse_select(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp ult i32 %max.red.08, %0
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a min reduction.
|
|
; CHECK-LABEL: @umin_red(
|
|
; CHECK: icmp ult <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umin.v2i32
|
|
|
|
define i32 @umin_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp ult i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Turn this into a min reduction. The select has its inputs reversed therefore
|
|
; this is a min reduction.
|
|
; CHECK-LABEL: @umin_red_inverse_select(
|
|
; CHECK: icmp ugt <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umin.v2i32
|
|
|
|
define i32 @umin_red_inverse_select(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp ugt i32 %max.red.08, %0
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; SGE -> SLT
|
|
; Turn this into a min reduction (select inputs are reversed).
|
|
; CHECK-LABEL: @sge_min_red(
|
|
; CHECK: icmp sge <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smin.v2i32
|
|
|
|
define i32 @sge_min_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp sge i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; SLE -> SGT
|
|
; Turn this into a max reduction (select inputs are reversed).
|
|
; CHECK-LABEL: @sle_min_red(
|
|
; CHECK: icmp sle <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.smax.v2i32
|
|
|
|
define i32 @sle_min_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp sle i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; UGE -> ULT
|
|
; Turn this into a min reduction (select inputs are reversed).
|
|
; CHECK-LABEL: @uge_min_red(
|
|
; CHECK: icmp uge <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umin.v2i32
|
|
|
|
define i32 @uge_min_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp uge i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; ULE -> UGT
|
|
; Turn this into a max reduction (select inputs are reversed).
|
|
; CHECK-LABEL: @ule_min_red(
|
|
; CHECK: icmp ule <2 x i32>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call i32 @llvm.vector.reduce.umax.v2i32
|
|
|
|
define i32 @ule_min_red(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%cmp3 = icmp ule i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; No reduction.
|
|
; CHECK-LABEL: @no_red_1(
|
|
; CHECK-NOT: icmp <2 x i32>
|
|
define i32 @no_red_1(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = load i32, i32* %arrayidx1, align 4
|
|
%cmp3 = icmp sgt i32 %0, %1
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @no_red_2(
|
|
; CHECK-NOT: icmp <2 x i32>
|
|
define i32 @no_red_2(i32 %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = load i32, i32* %arrayidx1, align 4
|
|
%cmp3 = icmp sgt i32 %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, i32 %0, i32 %1
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 %max.red.0
|
|
}
|
|
|
|
; Float tests.
|
|
|
|
; Maximum.
|
|
|
|
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
|
|
; CHECK-LABEL: @max_red_float(
|
|
; CHECK: fcmp fast ogt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @max_red_float(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ogt float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @max_red_float_ge(
|
|
; CHECK: fcmp fast oge <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @max_red_float_ge(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast oge float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_max_red_float(
|
|
; CHECK: fcmp fast olt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @inverted_max_red_float(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast olt float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_max_red_float_le(
|
|
; CHECK: fcmp fast ole <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @inverted_max_red_float_le(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ole float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @unordered_max_red_float(
|
|
; CHECK: fcmp fast ugt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @unordered_max_red_float(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ugt float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @unordered_max_red_float_ge(
|
|
; CHECK: fcmp fast uge <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @unordered_max_red_float_ge(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast uge float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_unordered_max_red_float(
|
|
; CHECK: fcmp fast ult <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @inverted_unordered_max_red_float(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ult float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_unordered_max_red_float_le(
|
|
; CHECK: fcmp fast ule <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
|
|
|
|
define float @inverted_unordered_max_red_float_le(float %max) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ule float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; Minimum.
|
|
|
|
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
|
|
; CHECK-LABEL: @min_red_float(
|
|
; CHECK: fcmp fast olt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @min_red_float(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast olt float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @min_red_float_le(
|
|
; CHECK: fcmp fast ole <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @min_red_float_le(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ole float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_min_red_float(
|
|
; CHECK: fcmp fast ogt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @inverted_min_red_float(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ogt float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_min_red_float_ge(
|
|
; CHECK: fcmp fast oge <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @inverted_min_red_float_ge(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast oge float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @unordered_min_red_float(
|
|
; CHECK: fcmp fast ult <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @unordered_min_red_float(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ult float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @unordered_min_red_float_le(
|
|
; CHECK: fcmp fast ule <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @unordered_min_red_float_le(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ule float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_unordered_min_red_float(
|
|
; CHECK: fcmp fast ugt <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @inverted_unordered_min_red_float(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ugt float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
|
|
; CHECK: fcmp fast uge <2 x float>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
|
|
|
|
define float @inverted_unordered_min_red_float_ge(float %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast uge float %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %min.red.0
|
|
}
|
|
|
|
; Make sure we handle doubles, too.
|
|
; CHECK-LABEL: @min_red_double(
|
|
; CHECK: fcmp fast olt <2 x double>
|
|
; CHECK: select <2 x i1>
|
|
; CHECK: middle.block
|
|
; CHECK: call fast double @llvm.vector.reduce.fmin.v2f64
|
|
|
|
define double @min_red_double(double %min) #0 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
|
|
%0 = load double, double* %arrayidx, align 4
|
|
%cmp3 = fcmp fast olt double %0, %min.red.08
|
|
%min.red.0 = select i1 %cmp3, double %0, double %min.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret double %min.red.0
|
|
}
|
|
|
|
|
|
; Don't this into a max reduction. The no-nans-fp-math attribute is missing
|
|
; CHECK-LABEL: @max_red_float_nans(
|
|
; CHECK-NOT: <2 x float>
|
|
|
|
define float @max_red_float_nans(float %max) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ogt float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; As above, with the no-signed-zeros-fp-math attribute missing
|
|
; CHECK-LABEL: @max_red_float_nsz(
|
|
; CHECK-NOT: <2 x float>
|
|
|
|
define float @max_red_float_nsz(float %max) #1 {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%cmp3 = fcmp fast ogt float %0, %max.red.08
|
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret float %max.red.0
|
|
}
|
|
|
|
; CHECK-LABEL: @smin_intrinsic(
|
|
; CHECK: <2 x i32> @llvm.smin.v2i32
|
|
; CHECK: i32 @llvm.vector.reduce.smin.v2i32
|
|
define i32 @smin_intrinsic(i32* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = tail call i32 @llvm.smin.i32(i32 %s.011, i32 %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %1
|
|
}
|
|
|
|
; CHECK-LABEL: @smax_intrinsic(
|
|
; CHECK: <2 x i32> @llvm.smax.v2i32
|
|
; CHECK: i32 @llvm.vector.reduce.smax.v2i32
|
|
define i32 @smax_intrinsic(i32* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = tail call i32 @llvm.smax.i32(i32 %s.011, i32 %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %1
|
|
}
|
|
|
|
; CHECK-LABEL: @umin_intrinsic(
|
|
; CHECK: <2 x i32> @llvm.umin.v2i32
|
|
; CHECK: i32 @llvm.vector.reduce.umin.v2i32
|
|
define i32 @umin_intrinsic(i32* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = tail call i32 @llvm.umin.i32(i32 %s.011, i32 %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %1
|
|
}
|
|
|
|
; CHECK-LABEL: @umax_intrinsic(
|
|
; CHECK: <2 x i32> @llvm.umax.v2i32
|
|
; CHECK: i32 @llvm.vector.reduce.umax.v2i32
|
|
define i32 @umax_intrinsic(i32* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = tail call i32 @llvm.umax.i32(i32 %s.011, i32 %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %1
|
|
}
|
|
|
|
; CHECK-LABEL: @fmin_intrinsic(
|
|
; CHECK: nnan nsz <2 x float> @llvm.minnum.v2f32
|
|
; CHECK: nnan nsz float @llvm.vector.reduce.fmin.v2f32
|
|
define float @fmin_intrinsic(float* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret float %1
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%1 = tail call nnan nsz float @llvm.minnum.f32(float %s.011, float %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: @fmax_intrinsic(
|
|
; CHECK: fast <2 x float> @llvm.maxnum.v2f32
|
|
; CHECK: fast float @llvm.vector.reduce.fmax.v2f32
|
|
define float @fmax_intrinsic(float* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret float %1
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%1 = tail call fast float @llvm.maxnum.f32(float %s.011, float %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: @fmin_intrinsic_nofast(
|
|
; CHECK-NOT: <2 x float> @llvm.minnum.v2f32
|
|
define float @fmin_intrinsic_nofast(float* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret float %1
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%1 = tail call float @llvm.minnum.f32(float %s.011, float %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: @fmax_intrinsic_nofast(
|
|
; CHECK-NOT: <2 x float> @llvm.maxnum.v2f32
|
|
define float @fmax_intrinsic_nofast(float* nocapture readonly %x) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret float %1
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%1 = tail call float @llvm.maxnum.f32(float %s.011, float %0)
|
|
%inc = add nuw nsw i32 %i.012, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: @sminmax(
|
|
; Min and max intrinsics - don't vectorize
|
|
; CHECK-NOT: <2 x i32>
|
|
define i32 @sminmax(i32* nocapture readonly %x, i32* nocapture readonly %y) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %cond9
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
|
|
%arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
|
|
%1 = load i32, i32* %arrayidx3, align 4
|
|
%cond9 = tail call i32 @llvm.smax.i32(i32 %s.0., i32 %1)
|
|
%inc = add nuw nsw i32 %i.025, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; CHECK-LABEL: @sminmin(
|
|
; CHECK: <2 x i32> @llvm.smin.v2i32
|
|
; CHECK: <2 x i32> @llvm.smin.v2i32
|
|
; CHECK: i32 @llvm.vector.reduce.smin.v2i32
|
|
define i32 @sminmin(i32* nocapture readonly %x, i32* nocapture readonly %y) {
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %cond9
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
|
|
%arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
|
|
%1 = load i32, i32* %arrayidx3, align 4
|
|
%cond9 = tail call i32 @llvm.smin.i32(i32 %s.0., i32 %1)
|
|
%inc = add nuw nsw i32 %i.025, 1
|
|
%exitcond.not = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; Make sure any check-not directives are not triggered by function declarations.
|
|
; CHECK: declare
|
|
|
|
declare i32 @llvm.smin.i32(i32, i32)
|
|
declare i32 @llvm.smax.i32(i32, i32)
|
|
declare i32 @llvm.umin.i32(i32, i32)
|
|
declare i32 @llvm.umax.i32(i32, i32)
|
|
declare float @llvm.minnum.f32(float, float)
|
|
declare float @llvm.maxnum.f32(float, float)
|
|
|
|
attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
|
|
attributes #1 = { "no-nans-fp-math"="true" }
|