Philip Reames 269bc684e7 [LV][RISCV] Disable vectorization of epilogue loops
Epilogue loop vectorization is a feature in the vectorize intended to avoid running fully scalar code when the vector length of the main loop turns out to be either longer than the trip count of the actual loop, or with a huge remainder.

In practice, this feature appears to not have been well tuned. I honestly don't think it should be on by default at all, but it definitely shouldn't be on for RISCV. Note that other targets have also disabled it, but they've done so via disabling interleaving - which is, well, completely unrelated - and we don't want to do that for RISCV.

In the near term, many examples I'm seeing have terrible codegen for epilogue vectorization. We are greatly increasing code size for little value at reasonable VLEN values for small types. In the long term, the cases that epilogue vectorization are intended to handle are likely better handled via tail folding on RISCV.

As an aside, I also don't really trust the correctness of epilogue vectorization. The code structure is such that otherwise straight forward changes sometimes break only epilogue vectorization. The reuse of an existing vplan without careful validation opens significant room for nasty bugs. Given how rarely the code is exercised, that is not a good combination.

As such, this patch introduces a TTI hook, and completely disables epilogue vectorization on RISCV.

Differential Revision: https://reviews.llvm.org/D136695
2022-10-25 14:28:02 -07:00

1093 lines
66 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s
; RUN: opt < %s -loop-vectorize -scalable-vectorization=off -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck --check-prefix=FIXED %s
; Tests specific to div/rem handling - both predicated and not
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @vector_udiv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]]
; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @vector_udiv(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FIXED-NEXT: [[TMP7:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]]
; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%divrem = udiv i64 %elem, %v
store i64 %divrem, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @vector_sdiv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]]
; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @vector_sdiv(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FIXED-NEXT: [[TMP7:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]]
; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%divrem = sdiv i64 %elem, %v
store i64 %divrem, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @vector_urem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = urem <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[DIVREM:%.*]] = urem i64 [[ELEM]], [[V]]
; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @vector_urem(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = urem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FIXED-NEXT: [[TMP7:%.*]] = urem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[DIVREM:%.*]] = urem i64 [[ELEM]], [[V]]
; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%divrem = urem i64 %elem, %v
store i64 %divrem, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @vector_srem(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = srem <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[DIVREM:%.*]] = srem i64 [[ELEM]], [[V]]
; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @vector_srem(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = srem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FIXED-NEXT: [[TMP7:%.*]] = srem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[DIVREM:%.*]] = srem i64 [[ELEM]], [[V]]
; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%divrem = srem i64 %elem, %v
store i64 %divrem, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @predicated_udiv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP7]], <vscale x 1 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0
; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; CHECK: do_op:
; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]]
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @predicated_udiv(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer
; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> <i64 1, i64 1>
; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> <i64 1, i64 1>
; FIXED-NEXT: [[TMP10:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[TMP8]]
; FIXED-NEXT: [[TMP11:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]]
; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>
; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0
; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; FIXED: do_op:
; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]]
; FIXED-NEXT: br label [[LATCH]]
; FIXED: latch:
; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%c = icmp ne i64 %v, 0
br i1 %c, label %do_op, label %latch
do_op:
%divrem = udiv i64 %elem, %v
br label %latch
latch:
%phi = phi i64 [%elem, %for.body], [%divrem, %do_op]
store i64 %phi, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @predicated_sdiv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP7]], <vscale x 1 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0
; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; CHECK: do_op:
; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]]
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @predicated_sdiv(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer
; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> <i64 1, i64 1>
; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> <i64 1, i64 1>
; FIXED-NEXT: [[TMP10:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[TMP8]]
; FIXED-NEXT: [[TMP11:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]]
; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>
; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0
; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; FIXED: do_op:
; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]]
; FIXED-NEXT: br label [[LATCH]]
; FIXED: latch:
; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%c = icmp ne i64 %v, 0
br i1 %c, label %do_op, label %latch
do_op:
%divrem = sdiv i64 %elem, %v
br label %latch
latch:
%phi = phi i64 [%elem, %for.body], [%divrem, %do_op]
store i64 %phi, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; CHECK-LABEL: @predicated_udiv_by_constant(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP6:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP6]], <vscale x 1 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42
; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; CHECK: do_op:
; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], 27
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @predicated_udiv_by_constant(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], <i64 42, i64 42>
; FIXED-NEXT: [[TMP8:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], <i64 27, i64 27>
; FIXED-NEXT: [[TMP9:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], <i64 27, i64 27>
; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>
; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42
; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; FIXED: do_op:
; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], 27
; FIXED-NEXT: br label [[LATCH]]
; FIXED: latch:
; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%c = icmp ne i64 %elem, 42
br i1 %c, label %do_op, label %latch
do_op:
%divrem = udiv i64 %elem, 27
br label %latch
latch:
%phi = phi i64 [%elem, %for.body], [%divrem, %do_op]
store i64 %phi, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; CHECK-LABEL: @predicated_sdiv_by_constant(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP6:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP6]], <vscale x 1 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42
; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; CHECK: do_op:
; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], 27
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @predicated_sdiv_by_constant(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], <i64 42, i64 42>
; FIXED-NEXT: [[TMP8:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], <i64 27, i64 27>
; FIXED-NEXT: [[TMP9:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], <i64 27, i64 27>
; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>
; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42
; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; FIXED: do_op:
; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], 27
; FIXED-NEXT: br label [[LATCH]]
; FIXED: latch:
; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%elem = load i64, ptr %arrayidx
%c = icmp ne i64 %elem, 42
br i1 %c, label %do_op, label %latch
do_op:
%divrem = sdiv i64 %elem, 27
br label %latch
latch:
%phi = phi i64 [%elem, %for.body], [%divrem, %do_op]
store i64 %phi, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}
define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
; CHECK-LABEL: @predicated_sdiv_by_minus_one(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP18:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP20:%.*]] = sdiv <vscale x 8 x i8> [[WIDE_LOAD]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = sdiv <vscale x 8 x i8> [[WIDE_LOAD1]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = xor <vscale x 8 x i1> [[TMP16]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 8 x i1> [[TMP17]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i8> [[TMP20]], <vscale x 8 x i8> [[WIDE_LOAD]]
; CHECK-NEXT: [[PREDPHI2:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i8> [[TMP21]], <vscale x 8 x i8> [[WIDE_LOAD1]]
; CHECK-NEXT: store <vscale x 8 x i8> [[PREDPHI]], ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 8
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP25]]
; CHECK-NEXT: store <vscale x 8 x i8> [[PREDPHI2]], ptr [[TMP26]], align 1
; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128
; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; CHECK: do_op:
; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i8 [[ELEM]], -1
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; CHECK-NEXT: store i8 [[PHI]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; FIXED-LABEL: @predicated_sdiv_by_minus_one(
; FIXED-NEXT: entry:
; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; FIXED: vector.ph:
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD1]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
; FIXED-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; FIXED-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; FIXED-NEXT: [[TMP10:%.*]] = sdiv <16 x i8> [[WIDE_LOAD]], [[TMP8]]
; FIXED-NEXT: [[TMP11:%.*]] = sdiv <16 x i8> [[WIDE_LOAD1]], [[TMP9]]
; FIXED-NEXT: [[TMP12:%.*]] = xor <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; FIXED-NEXT: [[TMP13:%.*]] = xor <16 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; FIXED-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP10]], <16 x i8> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[WIDE_LOAD1]]
; FIXED-NEXT: store <16 x i8> [[PREDPHI]], ptr [[TMP4]], align 1
; FIXED-NEXT: store <16 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; FIXED: middle.block:
; FIXED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; FIXED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; FIXED: scalar.ph:
; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; FIXED-NEXT: br label [[FOR_BODY:%.*]]
; FIXED: for.body:
; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; FIXED-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; FIXED-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128
; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]]
; FIXED: do_op:
; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i8 [[ELEM]], -1
; FIXED-NEXT: br label [[LATCH]]
; FIXED: latch:
; FIXED-NEXT: [[PHI:%.*]] = phi i8 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ]
; FIXED-NEXT: store i8 [[PHI]], ptr [[ARRAYIDX]], align 1
; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; FIXED: for.end:
; FIXED-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %iv
%elem = load i8, ptr %arrayidx
%c = icmp ne i8 %elem, 128
br i1 %c, label %do_op, label %latch
do_op:
%divrem = sdiv i8 %elem, -1 ;; UB if %elem = INT_MIN
br label %latch
latch:
%phi = phi i8 [%elem, %for.body], [%divrem, %do_op]
store i8 %phi, ptr %arrayidx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
for.end:
ret void
}